1 #include "machinepile.h"
6 #include "threadnotify.h"
8 #include "addUdpEnhance.h"
9 #include "addPrefetchEnhance.h"
17 #include "abortreaders.h"
24 #include <sys/select.h>
29 #define CONFIG_FILENAME "dstm.conf"
31 /* Thread transaction variables */
33 __thread objstr_t *t_cache;
34 __thread struct ___Object___ *revertlist;
37 __thread jmp_buf aborttrans;
40 /* Global Variables */
41 extern int classsize[];
42 pfcstats_t *evalPrefetch;
43 extern int numprefetchsites; //Global variable containing number of prefetch sites
44 extern pthread_mutex_t mainobjstore_mutex; // Mutex to lock main Object store
45 pthread_mutex_t prefetchcache_mutex; // Mutex to lock Prefetch Cache
46 pthread_mutexattr_t prefetchcache_mutex_attr; /* Attribute for lock to make it a recursive lock */
47 extern prehashtable_t pflookup; //Global Prefetch cache's lookup table
48 pthread_t wthreads[NUM_THREADS]; //Worker threads for working on the prefetch queue
49 pthread_t tPrefetch; /* Primary Prefetch thread that processes the prefetch queue */
50 extern objstr_t *mainobjstore;
51 unsigned int myIpAddr;
52 unsigned int *hostIpAddrs;
55 int myIndexInHostArray;
57 unsigned int waitThreadID;
59 unsigned int oidsPerBlock;
63 sockPoolHashTable_t *transReadSockPool;
64 sockPoolHashTable_t *transPrefetchSockPool;
65 sockPoolHashTable_t *transRequestSockPool;
66 pthread_mutex_t notifymutex;
67 pthread_mutex_t atomicObjLock;
69 /***********************************
70 * Global Variables for statistics
71 **********************************/
72 int numTransCommit = 0;
73 int numTransAbort = 0;
76 int nprehashSearch = 0;
84 /***********************************
85 * Global variables for Duplication
86 ***********************************/
89 int numLiveHostsInSystem;
90 int flipBit; // Used to distribute requests between primary and backup evenly
91 unsigned int *locateObjHosts;
95 unsigned int transIDMax;
96 unsigned int transIDMin;
97 unsigned int transIDIndex;
101 /******************************
102 * Global variables for Paxos
103 ******************************/
109 unsigned int origleader;
110 unsigned int temp_v_a;
114 void printhex(unsigned char *, int);
115 plistnode_t *createPiles();
116 plistnode_t *sortPiles(plistnode_t *pileptr);
118 /*******************************
119 * Send and Recv function calls
120 *******************************/
121 int send_data(int fd, void *buf, int buflen) {
122 char *buffer = (char *)(buf);
131 numbytes = send(fd, buffer, size, 0);
134 bytesSent += numbytes;
138 else if( numbytes < 0) {
139 // Receive returned an error.
140 // Analyze underlying cause
142 printf("%s -> fd : %d errno = %d %s\n",__func__, fd, errno,strerror(errno));
145 if(errno == ECONNRESET || errno == EAGAIN || errno == EWOULDBLOCK) {
146 // machine has failed
148 // if we see EAGAIN w/o failures, we should record the time
149 // when we start send and finish send see if it is longer
150 // than our threshold
153 printf("%s -> EAGAIN : %s\n",__func__,(errno == EAGAIN)?"TRUE":"FALSE");
164 printf("%s -> Unexpected ERROR!\n",__func__);
170 // Case : numbytes == 0
171 // // machine has failed -- this case probably doesn't occur in reality
177 printf("%s -> SHOULD NOT BE HERE\n",__func__);
182 } // close while loop
184 printf("%s-> Exiting\n", __func__);
187 return 0; // completed sending data
190 //Returns negative value if receive cannot be completed because of
191 //timeout or machine failure
193 int recv_data(int fd, void *buf, int buflen) {
194 char *buffer = (char *)(buf);
203 numbytes = recv(fd, buffer, size, 0);
210 else if (numbytes<0){
211 //Receive returned an error.
212 //Analyze underlying cause
214 printf("%s-> fd : %d errno = %d %s\n", __func__, fd, errno, strerror(errno));
216 if(errno == ECONNRESET || errno == EAGAIN || errno == EWOULDBLOCK) {
218 //if we see EAGAIN w/o failures, we should record the time
219 //when we start read and finish read and see if it is longer
222 printf("%s -> EAGAIN : %s\n",__func__,(errno == EAGAIN)?"TRUE":"FALSE");
224 if(errno == EAGAIN) {
227 printf("%s -> TRYcounter increases\n",__func__);
243 printf("%s -> Unexpected ERROR!\n",__func__);
244 printf("%s-> errno = %d %s\n", __func__, errno, strerror(errno));
250 //machine has failed -- this case probably doesn't occur in reality
253 printf("%s -> SHOULD NOT BE HERE\n",__func__);
260 printf("%s -> fd = %d Exiting\n",__func__,fd);
262 return 0; // got all the data
265 int recv_data_errorcode(int fd, void *buf, int buflen) {
267 printf("%s-> Start; fd:%d, buflen:%d\n", __func__, fd, buflen);
269 char *buffer = (char *)(buf);
273 numbytes = recv(fd, buffer, size, 0);
275 printf("%s-> numbytes: %d\n", __func__, numbytes);
279 else if (numbytes == -1) {
280 printf("%s -> ERROR NUMBER = %d %s\n",__func__,errno,strerror(errno));
281 perror("recv_data_errorcode");
289 printf("%s-> Exiting\n", __func__);
294 void printhex(unsigned char *ptr, int numBytes) {
296 for (i = 0; i < numBytes; i++) {
298 printf("0%x ", ptr[i]);
300 printf("%x ", ptr[i]);
306 inline int arrayLength(int *array) {
308 for(i=0 ; array[i] != -1; i++)
313 inline int findmax(int *array, int arraylength) {
316 for(i = 0; i < arraylength; i++) {
324 char* midtoIPString(unsigned int mid){
328 /* This function is a prefetch call generated by the compiler that
329 * populates the shared primary prefetch queue*/
330 void prefetch(int siteid, int ntuples, unsigned int *oids, unsigned short *endoffsets, short *arrayfields) {
331 /* Allocate for the queue node*/
332 int qnodesize = 2*sizeof(int) + ntuples * (sizeof(unsigned short) + sizeof(unsigned int)) + endoffsets[ntuples - 1] * sizeof(short);
334 char * node= getmemory(qnodesize);
335 int top=endoffsets[ntuples-1];
339 /* Set queue node values */
341 /* TODO: Remove this after testing */
342 evalPrefetch[siteid].callcount++;
344 *((int *)(node))=siteid;
345 *((int *)(node + sizeof(int))) = ntuples;
347 memcpy(node+len, oids, ntuples*sizeof(unsigned int));
348 memcpy(node+len+ntuples*sizeof(unsigned int), endoffsets, ntuples*sizeof(unsigned short));
349 memcpy(node+len+ntuples*(sizeof(unsigned int)+sizeof(short)), arrayfields, top*sizeof(short));
351 /* Lock and insert into primary prefetch queue */
355 /* This function starts up the transaction runtime. */
356 int dstmStartup(const char * option) {
357 pthread_t thread_Listen, udp_thread_Listen;
359 int master=option!=NULL && strcmp(option, "master")==0;
363 if (processConfigFile() != 0)
364 return 0; //TODO: return error value, cause main program to exit
371 printf("Trans stats is on\n");
378 //Initialize socket pool
379 transReadSockPool = createSockPool(transReadSockPool, DEFAULTSOCKPOOLSIZE);
380 transPrefetchSockPool = createSockPool(transPrefetchSockPool, DEFAULTSOCKPOOLSIZE);
381 transRequestSockPool = createSockPool(transRequestSockPool, DEFAULTSOCKPOOLSIZE);
387 pthread_attr_init(&attr);
388 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
391 pthread_create(&udp_thread_Listen, &attr, udpListenBroadcast, (void*)udpfd);
394 pthread_create(&thread_Listen, &attr, dstmListen, (void*)fd);
398 updateLiveHostsCommit();
400 if(!allHostsLive()) {
401 printf("Not all hosts live. Exiting.\n");
407 dstmListen((void *)fd);
412 //TODO Use this later
413 void *pCacheAlloc(objstr_t *store, unsigned int size) {
419 while(ptr->next != NULL) {
420 /* check if store is empty */
421 if(((unsigned int)ptr->top - (unsigned int)ptr - sizeof(objstr_t) + size) <= ptr->size) {
436 /* This function initiates the prefetch thread A queue is shared
437 * between the main thread of execution and the prefetch thread to
438 * process the prefetch call Call from compiler populates the shared
439 * queue with prefetch requests while prefetch thread processes the
440 * prefetch requests */
443 //Create and initialize prefetch cache structure
446 if((evalPrefetch = initPrefetchStats()) == NULL) {
447 printf("%s() Error allocating memory at %s, %d\n", __func__, __FILE__, __LINE__);
452 /* Initialize attributes for mutex */
453 pthread_mutexattr_init(&prefetchcache_mutex_attr);
454 pthread_mutexattr_settype(&prefetchcache_mutex_attr, PTHREAD_MUTEX_RECURSIVE_NP);
456 pthread_mutex_init(&prefetchcache_mutex, &prefetchcache_mutex_attr);
457 pthread_mutex_init(¬ifymutex, NULL);
458 pthread_mutex_init(&atomicObjLock, NULL);
460 //Create prefetch cache lookup table
461 if(prehashCreate(PHASH_SIZE, PLOADFACTOR)) {
466 //Initialize primary shared queue
468 //Initialize machine pile w/prefetch oids and offsets shared queue
471 //Create the primary prefetch thread
475 retval=pthread_create(&tPrefetch, NULL, transPrefetchNew, NULL);
479 retval=pthread_create(&tPrefetch, NULL, transPrefetch, NULL);
482 pthread_detach(tPrefetch);
486 /* This function stops the threads spawned */
490 pthread_cancel(tPrefetch);
491 for(t = 0; t < NUM_THREADS; t++)
492 pthread_cancel(wthreads[t]);
498 /* This functions inserts randowm wait delays in the order of msec
499 * Mostly used when transaction commits retry*/
506 req.tv_nsec = (long)(1000 + (t%10000)); //1-11 microsec
507 nanosleep(&req, NULL);
511 /* This function initializes things required in the transaction start*/
513 t_cache = objstrCreate(1048576);
514 t_chashCreate(CHASH_SIZE, CLOADFACTOR);
521 /*#define INLINE inline __attribute__((always_inline))
523 INLINE void * chashSearchI(chashtable_t *table, unsigned int key) {
524 //REMOVE HASH FUNCTION CALL TO MAKE SURE IT IS INLINED HERE
525 chashlistnode_t *node = &table->table[(key & table->mask)>>1];
528 if(node->key == key) {
532 } while(node != NULL);
540 /* This function finds the location of the objects involved in a transaction
541 * and returns the pointer to the object if found in a remote location */
542 __attribute__((pure)) objheader_t *transRead(unsigned int oid) {
543 unsigned int machinenumber;
544 objheader_t *tmp, *objheader;
545 objheader_t *objcopy;
548 chashlistnode_t *node;
554 node= &c_table[(oid & c_mask)>>1];
556 if(node->key == oid) {
561 return &((objheader_t*)node->val)[1];
567 } while(node != NULL);
571 if((objheader = chashSearchI(record->lookupTable, oid)) != NULL) {
576 return &objheader[1];
585 //abort this transaction
586 //printf("ABORTING\n");
587 removetransactionhash();
588 objstrDelete(t_cache);
590 _longjmp(aborttrans,1);
595 if ((objheader = (objheader_t *) mhashSearch(oid)) != NULL) {
599 /* Look up in machine lookup table and copy into cache*/
600 GETSIZE(size, objheader);
601 size += sizeof(objheader_t);
602 objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
603 memcpy(objcopy, objheader, size);
604 /* Insert into cache's lookup table */
606 t_chashInsert(OID(objheader), objcopy);
614 if((tmp = (objheader_t *) prehashSearch(oid)) != NULL) {
618 /* Look up in prefetch cache */
620 size+=sizeof(objheader_t);
621 objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
622 memcpy(objcopy, tmp, size);
623 /* Insert into cache's lookup table */
624 t_chashInsert(OID(tmp), objcopy);
632 /* Get the object from the remote location */
633 if((machinenumber = lhashSearch(oid)) == 0) {
634 printf("Error: %s() No machine found for oid =% %s,%dx\n",__func__, machinenumber, __FILE__, __LINE__);
637 objcopy = getRemoteObj(machinenumber, oid);
639 if(objcopy == NULL) {
640 printf("Error: Object not found in Remote location %s, %d\n", __FILE__, __LINE__);
656 /* This function finds the location of the objects involved in a transaction
657 * and returns the pointer to the object if found in a remote location */
658 __attribute__((pure)) objheader_t *transRead2(unsigned int oid) {
659 unsigned int machinenumber;
660 objheader_t *tmp, *objheader;
661 objheader_t *objcopy;
665 printf("%s-> Start, oid:%u\n", __func__, oid);
670 //abort this transaction
671 //printf("ABORTING\n");
672 removetransactionhash();
673 objstrDelete(t_cache);
675 _longjmp(aborttrans,1);
680 if ((objheader = (objheader_t *) mhashSearch(oid)) != NULL) {
682 printf("%s-> Grab from this machine\n", __func__);
687 /* Look up in machine lookup table and copy into cache*/
688 GETSIZE(size, objheader);
689 size += sizeof(objheader_t);
690 objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
691 memcpy(objcopy, objheader, size);
692 /* Insert into cache's lookup table */
694 t_chashInsert(OID(objheader), objcopy);
702 , TYPE(header)if((tmp = (objheader_t *) prehashSearch(oid)) != NULL) {
706 /* Look up in prefetch cache */
708 size+=sizeof(objheader_t);
709 objcopy = (objheader_t *) objstrAlloc(&t_cache, size);
710 memcpy(objcopy, tmp, size);
711 /* Insert into cache's lookup table */
712 t_chashInsert(OID(tmp), objcopy);
720 /* Get the object from the remote location */
723 printf("%s-> Grab from remote machine\n", __func__);
727 unsigned int mindex = findHost(lhashSearch(oid));
728 machinenumber = locateObjHosts[2*mindex+flipBit];
730 if(numLiveHostsInSystem > 1)
736 printf("mindex:%d, oid:%d, machinenumber:%s\n", mindex, oid, midtoIPString(machinenumber));
739 if((machinenumber = lhashSearch(oid)) == 0) {
740 printf("Error: %s() No machine found for oid =% %s,%dx\n",__func__, machinenumber, __FILE__, __LINE__);
744 objcopy = getRemoteObj(machinenumber, oid);
747 restoreDuplicationState(machinenumber);
749 printf("%s -> Recall transRead2\n",__func__);
751 return transRead2(oid);
756 if(objcopy == NULL) {
757 printf("Error: Object not found in Remote location %s, %d\n", __FILE__, __LINE__);
770 printf("%s -> Finished!!\n",__func__);
774 /* This function creates objects in the transaction record */
775 objheader_t *transCreateObj(unsigned int size) {
776 objheader_t *tmp = (objheader_t *) objstrAlloc(&t_cache, (sizeof(objheader_t) + size));
777 OID(tmp) = getNewOID();
782 t_chashInsert(OID(tmp), tmp);
785 return &tmp[1]; //want space after object header
793 /* This function creates machine piles based on all machines involved in a
794 * transaction commit request */
795 plistnode_t *createPiles() {
798 plistnode_t *pile = NULL;
799 unsigned int machinenum;
800 unsigned int destMachine[2];
801 objheader_t *headeraddr;
802 chashlistnode_t * ptr = c_table;
803 /* Represents number of bins in the chash table */
804 unsigned int size = c_size;
806 for(i = 0; i < size ; i++) {
807 chashlistnode_t * curr = &ptr[i];
808 /* Inner loop to traverse the linked list of the cache lookupTable */
809 while(curr != NULL) {
810 //if the first bin in hash table is empty
813 headeraddr=(objheader_t *) curr->val;
816 oid = OID(headeraddr);
818 printf("%s-> oid:%u, version:%d, status:%d, type:%d\n", __func__, OID(headeraddr), headeraddr->version, STATUS(headeraddr), TYPE(headeraddr));
820 if (STATUS(headeraddr) & NEW) { // new/local object
821 printf("%s-> new/local object\n", __func__);
823 else if ((mhashSearch(curr->key) != NULL)) { //local/nonnew
824 if(STATUS(headeraddr) & DIRTY) { // modified
825 printf("%s-> old/local/mod object\n", __func__);
828 printf("%s-> old/local/read object\n", __func__);
831 else if ((machinenum = lhashSearch(curr->key)) != 0) { // remote/nonnew object
832 if(STATUS(headeraddr) & DIRTY) { //modified
833 printf("%s-> remote/local/mod object\n", __func__);
836 printf("%s-> remote/local/read object\n", __func__);
840 printf("Error: No such machine %s, %d\n", __FILE__, __LINE__);
843 unsigned int pmid = getPrimaryMachine(lhashSearch(oid));
844 unsigned int bmid = getBackupMachine(lhashSearch(oid));
845 printf("%s-> Primary Machine: [%s], ", __func__, midtoIPString(pmid));
846 printf("Backup Machine: [%s]\n", midtoIPString(bmid));
849 if(STATUS(headeraddr) & DIRTY || STATUS(headeraddr) & NEW) {
852 pile = pInsert(pile, headeraddr, getPrimaryMachine(lhashSearch(oid)), c_numelements);
855 STATUS(headeraddr) = DIRTY;
857 pile = pInsert(pile, headeraddr, getBackupMachine(lhashSearch(oid)), c_numelements);
859 // Get machine location for object id (and whether local or not)
860 if (STATUS(headeraddr) & NEW || (mhashSearch(curr->key) != NULL)) {
861 machinenum = myIpAddr;
862 } else if ((machinenum = lhashSearch(curr->key)) == 0) {
863 printf("Error: No such machine %s, %d\n", __FILE__, __LINE__);
866 //Make machine groups
867 pile = pInsert(pile, headeraddr, machinenum, c_numelements);
875 /* This function creates machine piles based on all machines involved in a
876 * transaction commit request */
877 plistnode_t *createPiles() {
879 plistnode_t *pile = NULL;
880 unsigned int machinenum;
881 unsigned int destMachine[2];
882 objheader_t *headeraddr;
883 struct chashentry * ptr = c_table;
884 /* Represents number of bins in the chash table */
885 unsigned int size = c_size;
887 for(i = 0; i < size ; i++) {
888 struct chashentry * curr = & ptr[i];
889 /* Inner loop to traverse the linked list of the cache lookupTable */
890 // if the first bin in hash table is empty
893 headeraddr=(objheader_t *) curr->ptr;
895 //Get machine location for object id (and whether local or not)
896 if (STATUS(headeraddr) & NEW || (mhashSearch(curr->key) != NULL)) {
897 machinenum = myIpAddr;
898 } else if ((machinenum = lhashSearch(curr->key)) == 0) {
899 printf("Error: No such machine %s, %d\n", __FILE__, __LINE__);
903 //Make machine groups
904 pile = pInsert(pile, headeraddr, machinenum, c_numelements);
910 /* This function initiates the transaction commit process
911 * Spawns threads for each of the new connections with Participants
912 * and creates new piles by calling the createPiles(),
913 * Sends a transrequest() to each remote machines for objects found remotely
914 * and calls handleLocalReq() to process objects found locally */
916 unsigned int tot_bytes_mod, *listmid;
917 plistnode_t *pile, *pile_ptr;
918 char treplyretry; /* keeps track of the common response that needs to be sent */
920 trans_commit_data_t transinfo; /* keeps track of objs locked during transaction */
924 unsigned int transID = getNewTransID();
927 printf("%s -> Starts transCommit\n",__func__);
932 //abort this transaction
934 * printf("ABORTING TRANSACTION AT COMMIT\n");
936 removetransactionhash();
937 objstrDelete(t_cache);
940 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
949 /* Look through all the objects in the transaction record and make piles
950 * for each machine involved in the transaction*/
952 pile_ptr = pile = createPiles();
953 pile_ptr = pile = sortPiles(pile);
958 /* Create the packet to be sent in TRANS_REQUEST */
960 /* Count the number of participants */
962 pilecount = pCount(pile);
964 /* Create a list of machine ids(Participants) involved in transaction */
965 listmid = calloc(pilecount, sizeof(unsigned int));
966 pListMid(pile, listmid);
968 /* Create a socket and getReplyCtrl array, initialize */
969 int socklist[pilecount];
971 for(loopcount = 0 ; loopcount < pilecount; loopcount++)
972 socklist[loopcount] = 0;
973 char getReplyCtrl[pilecount];
974 for(loopcount = 0 ; loopcount < pilecount; loopcount++)
975 getReplyCtrl[loopcount] = 0;
977 /* Process each machine pile */
979 int localReqsock = -1;
980 trans_req_data_t *tosend;
981 tosend = calloc(pilecount, sizeof(trans_req_data_t));
982 while(pile != NULL) {
984 printf("%s-> New pile:[%s],", __func__, midtoIPString(pile->mid));
985 printf(" myIp:[%s]\n", midtoIPString(myIpAddr));
987 tosend[sockindex].f.control = TRANS_REQUEST;
988 tosend[sockindex].f.mcount = pilecount;
989 tosend[sockindex].f.numread = pile->numread;
990 tosend[sockindex].f.nummod = pile->nummod;
991 tosend[sockindex].f.numcreated = pile->numcreated;
993 printf("%s-> numread:%d, nummod:%d, numcreated:%d\n", __func__, pile->numread, pile->nummod, pile->numcreated);
995 tosend[sockindex].f.sum_bytes = pile->sum_bytes;
996 tosend[sockindex].listmid = listmid;
997 tosend[sockindex].objread = pile->objread;
998 tosend[sockindex].oidmod = pile->oidmod;
999 tosend[sockindex].oidcreated = pile->oidcreated;
1001 if(pile->mid != myIpAddr) {
1002 if((sd = getSockWithLock(transRequestSockPool, pile->mid)) < 0) {
1003 printf("\ntransRequest(): socket create error\n");
1007 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1011 socklist[sockindex] = sd;
1012 /* Send bytes of data with TRANS_REQUEST control message */
1013 send_data(sd, &(tosend[sockindex].f), sizeof(fixed_data_t));
1015 /* Send list of machines involved in the transaction */
1017 int size=sizeof(unsigned int)*(tosend[sockindex].f.mcount);
1018 send_data(sd, tosend[sockindex].listmid, size);
1021 /* Send oids and version number tuples for objects that are read */
1023 int size=(sizeof(unsigned int)+sizeof(unsigned short))*(tosend[sockindex].f.numread);
1024 send_data(sd, tosend[sockindex].objread, size);
1027 /* Send objects that are modified */
1029 if((modptr = calloc(1, tosend[sockindex].f.sum_bytes)) == NULL) {
1030 printf("Calloc error for modified objects %s, %d\n", __FILE__, __LINE__);
1034 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1040 for(i = 0; i < tosend[sockindex].f.nummod ; i++) {
1042 objheader_t *headeraddr;
1043 if((headeraddr = t_chashSearch(tosend[sockindex].oidmod[i])) == NULL) {
1044 printf("%s() Error: No such oid %s, %d\n", __func__, __FILE__, __LINE__);
1049 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1053 GETSIZE(size,headeraddr);
1054 size+=sizeof(objheader_t);
1055 memcpy(modptr+offset, headeraddr, size);
1058 send_data(sd, modptr, tosend[sockindex].f.sum_bytes);
1061 /* send transaction id, number of machine involved, machine ids */
1062 send_data(sd, &transID, sizeof(unsigned int));
1065 } else { //handle request locally
1066 localReqsock = sockindex;
1067 handleLocalReq(&tosend[sockindex], &transinfo, &getReplyCtrl[sockindex]);
1071 } //end of pile processing
1073 /* Recv Ctrl msgs from all machines */
1075 printf("%s-> Finished sending transaction read/mod objects\n",__func__);
1079 for(i = 0; i < pilecount; i++) {
1080 if(i == localReqsock)
1082 int sd = socklist[i];
1085 int timeout; // a variable to check if the connection is still alive. if it is -1, then need to transcommit again
1086 timeout = recv_data(sd, &control, sizeof(char));
1087 //Update common data structure with new ctrl msg
1088 getReplyCtrl[i] = control;
1089 /* Recv Objects if participant sends TRANS_DISAGREE */
1090 //printf("getReplyCtrl[%d] = %d\n", i, (int)getReplyCtrl[i]);
1092 if(control == TRANS_DISAGREE) {
1094 timeout = recv_data(sd, &length, sizeof(int));
1096 pthread_mutex_lock(&prefetchcache_mutex);
1097 if ((newAddr = prefetchobjstrAlloc((unsigned int)length)) == NULL) {
1098 printf("Error: %s() objstrAlloc error for copying into prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
1101 pthread_mutex_unlock(&prefetchcache_mutex);
1103 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1107 pthread_mutex_unlock(&prefetchcache_mutex);
1108 timeout = recv_data(sd, newAddr, length);
1110 while(length != 0) {
1111 unsigned int oidToPrefetch;
1112 objheader_t * header;
1113 header = (objheader_t *)(((char *)newAddr) + offset);
1114 oidToPrefetch = OID(header);
1117 GETSIZE(size, header);
1118 size += sizeof(objheader_t);
1119 //make an entry in prefetch hash table
1121 if((oldptr = prehashSearch(oidToPrefetch)) != NULL) {
1122 prehashRemove(oidToPrefetch);
1123 prehashInsert(oidToPrefetch, header);
1125 prehashInsert(oidToPrefetch, header);
1127 length = length - size;
1130 } //end of receiving objs
1136 printf("%s -> TIMEOUT!!!!!!!\n",__func__);
1139 deadmid = listmid[i];
1142 printf("%s -> Dead Machine ID : %s\n",__func__,midtoIPString(deadmid));
1143 printf("%s -> Dead SD : %d\n",__func__,sd);
1145 getReplyCtrl[i] = TRANS_DISAGREE;
1152 printf("%s-> Decide final response now\n", __func__);
1154 /* Decide the final response */
1155 if((finalResponse = decideResponse(getReplyCtrl, &treplyretry, pilecount)) == 0) {
1156 printf("Error: %s() in updating prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
1160 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1165 printf("%s-> Final Response: %d\n", __func__, (int)finalResponse);
1168 /* Send responses to all machines */
1169 for(i = 0; i < pilecount; i++) {
1170 int sd = socklist[i];
1175 if(finalResponse == TRANS_COMMIT) {
1177 /* Update prefetch cache */
1178 if((retval = updatePrefetchCache(&(tosend[i]))) != 0) {
1179 printf("Error: %s() in updating prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
1183 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1188 /* Invalidate objects in other machine cache */
1189 if(tosend[i].f.nummod > 0) {
1190 if((retval = invalidateObj(&(tosend[i]))) != 0) {
1191 printf("Error: %s() in invalidating Objects %s, %d\n", __func__, __FILE__, __LINE__);
1195 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1201 removetransaction(tosend[i].oidmod,tosend[i].f.nummod);
1202 removethisreadtransaction(tosend[i].objread, tosend[i].f.numread);
1206 else if (!treplyretry) {
1207 removethistransaction(tosend[i].oidmod,tosend[i].f.nummod);
1208 removethisreadtransaction(tosend[i].objread,tosend[i].f.numread);
1212 send_data(sd,&finalResponse,sizeof(char));
1214 printf("%s -> Decision Sent to %s\n",__func__,midtoIPString(listmid[i]));
1218 /* Complete local processing */
1220 thashInsert(transID,finalResponse);
1222 doLocalProcess(finalResponse, &(tosend[i]), &transinfo);
1225 if(finalResponse == TRANS_COMMIT) {
1226 removetransaction(tosend[i].oidmod,tosend[i].f.nummod);
1227 removethisreadtransaction(tosend[i].objread,tosend[i].f.numread);
1228 } else if (!treplyretry) {
1229 removethistransaction(tosend[i].oidmod,tosend[i].f.nummod);
1230 removethisreadtransaction(tosend[i].objread,tosend[i].f.numread);
1236 removetransaction(tosend[i].oidmod,tosend[i].f.nummod);
1237 removethisreadtransaction(tosend[i].objread, tosend[i].f.numread);
1244 printf("%s-> Free sockets\n", __func__);
1246 for(i = 0; i < pilecount; i++) {
1247 if(socklist[i] != 0) {
1248 freeSockWithLock(transRequestSockPool, listmid[i], socklist[i]);
1252 /* Free resources */
1257 /* wait a random amount of time before retrying to commit transaction*/
1264 } while (treplyretry && deadmid != -1);
1266 if(finalResponse == TRANS_ABORT) {
1271 /* Free Resources */
1272 objstrDelete(t_cache);
1275 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1278 if(deadmid != -1) { /* if deadmid is greater than or equal to 0,
1279 then there is dead machine. */
1281 printf("%s -> Dead machine Detected : %s\n",__func__,midtoIPString(deadmid));
1283 restoreDuplicationState(deadmid);
1285 printf("%s -> Duplication completed\n",__func__);
1290 } else if(finalResponse == TRANS_COMMIT) {
1294 /* Free Resources */
1295 objstrDelete(t_cache);
1298 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1302 //TODO Add other cases
1303 printf("Error: in %s() THIS SHOULD NOT HAPPEN.....EXIT PROGRAM\n", __func__);
1305 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1310 printf("%s-> End, line:%d\n\n", __func__, __LINE__);
1315 /* This function handles the local objects involved in a transaction
1316 * commiting process. It also makes a decision if this local machine
1317 * sends AGREE or DISAGREE or SOFT_ABORT to coordinator */
1318 void handleLocalReq(trans_req_data_t *tdata, trans_commit_data_t *transinfo, char *getReplyCtrl) {
1319 unsigned int *oidnotfound = NULL, *oidlocked = NULL;
1320 int numoidnotfound = 0, numoidlocked = 0;
1321 int v_nomatch = 0, v_matchlock = 0, v_matchnolock = 0;
1324 unsigned short version;
1326 /* Counters and arrays to formulate decision on control message to be sent */
1327 oidnotfound = (unsigned int *) calloc((tdata->f.numread + tdata->f.nummod), sizeof(unsigned int));
1328 oidlocked = (unsigned int *) calloc((tdata->f.numread + tdata->f.nummod +1), sizeof(unsigned int)); // calloc additional 1 byte for
1329 //setting a divider between read and write locks
1330 numread = tdata->f.numread;
1331 /* Process each oid in the machine pile/ group per thread */
1332 for (i = 0; i < tdata->f.numread + tdata->f.nummod; i++) {
1333 if (i < tdata->f.numread) {
1334 int incr = sizeof(unsigned int) + sizeof(unsigned short); // Offset that points to next position in the objread array
1336 oid = *((unsigned int *)(((char *)tdata->objread) + incr));
1337 version = *((unsigned short *)(((char *)tdata->objread) + incr + sizeof(unsigned int)));
1338 commitCountForObjRead(getReplyCtrl, oidnotfound, oidlocked, &numoidnotfound, &numoidlocked, &v_nomatch, &v_matchlock, &v_matchnolock, oid, version);
1339 } else { // Objects Modified
1340 if(i == tdata->f.numread) {
1341 oidlocked[numoidlocked++] = -1;
1344 objheader_t *headptr;
1345 headptr = (objheader_t *) t_chashSearch(tdata->oidmod[i-numread]);
1346 if (headptr == NULL) {
1347 printf("Error: handleLocalReq() returning NULL, no such oid %s, %d\n", __FILE__, __LINE__);
1351 version = headptr->version;
1352 commitCountForObjMod(getReplyCtrl, oidnotfound, oidlocked, &numoidnotfound, &numoidlocked, &v_nomatch, &v_matchlock, &v_matchnolock, oid, version);
1356 /* Fill out the trans_commit_data_t data structure. This is required for a trans commit process
1357 * if Participant receives a TRANS_COMMIT */
1358 transinfo->objlocked = oidlocked;
1359 transinfo->objnotfound = oidnotfound;
1360 transinfo->modptr = NULL;
1361 transinfo->numlocked = numoidlocked;
1362 transinfo->numnotfound = numoidnotfound;
1364 /* Condition to send TRANS_AGREE */
1365 if(v_matchnolock == tdata->f.numread + tdata->f.nummod) {
1367 printf("%s -> TRANS_AGREE\n",__func__);
1369 *getReplyCtrl = TRANS_AGREE;
1371 /* Condition to send TRANS_SOFT_ABORT */
1372 if((v_matchlock > 0 && v_nomatch == 0) || (numoidnotfound > 0 && v_nomatch == 0)) {
1374 printf("%s -> TRANS_SOFT_ABORT\n",__func__);
1376 *getReplyCtrl = TRANS_SOFT_ABORT;
1380 void doLocalProcess(char finalResponse, trans_req_data_t *tdata, trans_commit_data_t *transinfo) {
1382 if(finalResponse == TRANS_ABORT) {
1383 if(transAbortProcess(transinfo) != 0) {
1384 printf("Error in transAbortProcess() %s,%d\n", __FILE__, __LINE__);
1388 } else if(finalResponse == TRANS_COMMIT) {
1390 /* Invalidate objects in other machine cache */
1391 if(tdata->f.nummod > 0) {
1393 if((retval = invalidateObj(tdata)) != 0) {
1394 printf("Error: %s() in invalidating Objects %s, %d\n", __func__, __FILE__, __LINE__);
1399 if(transComProcess(tdata, transinfo) != 0) {
1400 printf("Error in transComProcess() %s,%d\n", __FILE__, __LINE__);
1405 printf("ERROR...No Decision\n");
1409 if (transinfo->objlocked != NULL) {
1410 free(transinfo->objlocked);
1412 if (transinfo->objnotfound != NULL) {
1413 free(transinfo->objnotfound);
1417 /* This function decides the reponse that needs to be sent to
1418 * all Participant machines after the TRANS_REQUEST protocol */
1419 char decideResponse(char *getReplyCtrl, char *treplyretry, int pilecount) {
1420 int i, transagree = 0, transdisagree = 0, transsoftabort = 0; /* Counters to formulate decision of what
1422 for (i = 0 ; i < pilecount; i++) {
1424 control = getReplyCtrl[i];
1428 printf("%s-> Participant sent unknown message, i:%d, Control: %d\n", __func__, i, (int)control);
1431 /* treat as disagree, pass thru */
1432 case TRANS_DISAGREE:
1435 printf("%s-> Participant sent TRANS_DISAGREE, i:%d, Control: %d\n", __func__, i, (int)control);
1442 printf("%s-> Participant sent TRANS_AGREE, i:%d, Control: %d\n", __func__, i, (int)control);
1446 case TRANS_SOFT_ABORT:
1449 printf("%s-> Participant sent TRANS_SOFT_ABORT, i:%d, Control: %d\n", __func__, i, (int)control);
1455 if(transdisagree > 0) {
1460 /* clear objects from prefetch cache */
1463 } else if(transagree == pilecount) {
1466 return TRANS_COMMIT;
1468 /* Send Abort in soft abort case followed by retry commiting transaction again*/
1475 /* This function opens a connection, places an object read request to
1476 * the remote machine, reads the control message and object if
1477 * available and copies the object and its header to the local
1480 void *getRemoteObj(unsigned int mnum, unsigned int oid) {
1482 struct sockaddr_in serv_addr;
1486 void *objcopy = NULL;
1491 if((sd = getSock2(transReadSockPool, mnum)) != -1) {
1492 char readrequest[sizeof(char)+sizeof(unsigned int)];
1493 readrequest[0] = READ_REQUEST;
1494 *((unsigned int *)(&readrequest[1])) = oid;
1495 send_data(sd, readrequest, sizeof(readrequest));
1498 printf("%s -> creating socket error\n",__func__);
1501 /* Read response from the Participant */
1502 if(recv_data(sd, &control, sizeof(char)) < 0) {
1507 if (control==OBJECT_NOT_FOUND) {
1509 } else if(control==OBJECT_FOUND) {
1511 /* Read object if found into local cache */
1513 if(recv_data(sd, &size, sizeof(int)) < 0) {
1518 objcopy = objstrAlloc(&t_cache, size);
1520 if(recv_data(sd, objcopy, size) < 0) {
1527 /* Insert into cache's lookup table */
1528 t_chashInsert(oid, objcopy);
1530 totalObjSize += size;
1537 /* ask machines if they received decision */
1538 char receiveDecisionFromBackup(unsigned int transID,int nummid,unsigned int *listmid)
1541 printf("%s -> Entering\n",__func__);
1544 int sd; // socket id
1548 for(i = 0; i < nummid; i++) {
1549 if((sd = getSock(transReadSockPool, listmid[i])) < 0) {
1550 printf("%s -> socket Error!!\n");
1553 char control = ASK_COMMIT;
1555 send_data(sd,&control, sizeof(char));
1556 send_data(sd,&transID, sizeof(unsigned int));
1558 // return -1 if it didn't receive the response
1559 int timeout = recv_data(sd,&response, sizeof(char));
1562 if(timeout == 0 || response > 0)
1563 break; // received response
1565 // else check next machine
1566 freeSock(transReadSockPool, listmid[i],sd);
1570 printf("%s -> response : %d\n",__func__,response);
1573 return (response==-1)?TRANS_ABORT:response;
1578 void restoreDuplicationState(unsigned int deadHost) {
1582 if(!liveHosts[findHost(deadHost)]) {
1587 if(deadHost == leader)
1591 printf("%s-> leader?:%s, me?:%s\n", __func__, midtoIPString(leader), (myIpAddr == leader)?"LEADER":"NOT LEADER");
1594 if(leader == myIpAddr) {
1595 pthread_mutex_lock(&leaderFixing_mutex);
1598 pthread_mutex_unlock(&leaderFixing_mutex);
1600 if(!liveHosts[findHost(deadHost)]) {
1602 printf("%s -> already fixed\n",__func__);
1604 pthread_mutex_lock(&leaderFixing_mutex);
1606 pthread_mutex_unlock(&leaderFixing_mutex);
1610 duplicateLostObjects(deadHost);
1612 if(updateLiveHostsCommit() != 0) {
1613 printf("%s -> error updateLiveHostsCommit()\n",__func__);
1616 pthread_mutex_lock(&leaderFixing_mutex);
1618 pthread_mutex_unlock(&leaderFixing_mutex);
1622 pthread_mutex_unlock(&leaderFixing_mutex);
1624 printf("%s (REMOTE_RESTORE_DUPLICATED_STATE -> LEADER is already fixing\n",__func__);
1630 if((sd = getSockWithLock(transPrefetchSockPool, leader)) < 0) {
1631 printf("%s -> socket create error\n",__func__);
1634 ctrl = REMOTE_RESTORE_DUPLICATED_STATE;
1635 send_data(sd, &ctrl, sizeof(char));
1636 send_data(sd, &deadHost, sizeof(unsigned int));
1637 freeSockWithLock(transPrefetchSockPool,leader,sd);
1641 printf("%s -> Finished!\n",__func__);
1646 /* Commit info for objects modified */
1647 void commitCountForObjMod(char *getReplyCtrl, unsigned int *oidnotfound, unsigned int *oidlocked, int *numoidnotfound,
1648 int *numoidlocked, int *v_nomatch, int *v_matchlock, int *v_matchnolock, unsigned int oid, unsigned short version) {
1650 /* Check if object is still present in the machine since the beginning of TRANS_REQUEST */
1651 /* Save the oids not found and number of oids not found for later use */
1652 if ((mobj = mhashSearch(oid)) == NULL) { /* Obj not found */
1653 /* Save the oids not found and number of oids not found for later use */
1654 oidnotfound[*numoidnotfound] = oid;
1655 (*numoidnotfound)++;
1656 } else { /* If Obj found in machine (i.e. has not moved) */
1657 /* Check if Obj is locked by any previous transaction */
1658 if (write_trylock(STATUSPTR(mobj))) { // Can acquire write lock
1659 if (version == ((objheader_t *)mobj)->version) { /* match versions */
1661 //Keep track of what is locked
1662 oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
1663 } else { /* If versions don't match ...HARD ABORT */
1665 /* Send TRANS_DISAGREE to Coordinator */
1666 *getReplyCtrl = TRANS_DISAGREE;
1668 //Keep track of what is locked
1669 oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
1670 //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
1673 } else { //A lock is acquired some place else
1674 if (version == ((objheader_t *)mobj)->version) { /* Check if versions match */
1676 } else { /* If versions don't match ...HARD ABORT */
1678 /* Send TRANS_DISAGREE to Coordinator */
1679 *getReplyCtrl = TRANS_DISAGREE;
1680 //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
1687 /* Commit info for objects modified */
1688 void commitCountForObjRead(char *getReplyCtrl, unsigned int *oidnotfound, unsigned int *oidlocked, int *numoidnotfound,
1689 int *numoidlocked, int *v_nomatch, int *v_matchlock, int *v_matchnolock, unsigned int oid, unsigned short version) {
1691 /* Check if object is still present in the machine since the beginning of TRANS_REQUEST */
1692 /* Save the oids not found and number of oids not found for later use */
1693 if ((mobj = mhashSearch(oid)) == NULL) { /* Obj not found */
1694 /* Save the oids not found and number of oids not found for later use */
1695 oidnotfound[*numoidnotfound] = oid;
1696 (*numoidnotfound)++;
1697 } else { /* If Obj found in machine (i.e. has not moved) */
1698 /* Check if Obj is locked by any previous transaction */
1699 if (read_trylock(STATUSPTR(mobj))) { // Can further acquire read locks
1700 if (version == ((objheader_t *)mobj)->version) { /* If locked then match versions */
1702 //Keep track of what is locked
1703 oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
1704 } else { /* If versions don't match ...HARD ABORT */
1706 /* Send TRANS_DISAGREE to Coordinator */
1707 *getReplyCtrl = TRANS_DISAGREE;
1708 //Keep track of what is locked
1709 oidlocked[(*numoidlocked)++] = OID(((objheader_t *)mobj));
1710 //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
1713 } else { //Has reached max number of readers or some other transaction
1714 //has acquired a lock on this object
1715 if (version == ((objheader_t *)mobj)->version) { /* Check if versions match */
1717 } else { /* If versions don't match ...HARD ABORT */
1719 /* Send TRANS_DISAGREE to Coordinator */
1720 *getReplyCtrl = TRANS_DISAGREE;
1721 //printf("%s() oid = %d, type = %d\t", __func__, OID(mobj), TYPE((objheader_t *)mobj));
1728 /* This function completes the ABORT process if the transaction is aborting */
1729 int transAbortProcess(trans_commit_data_t *transinfo) {
1731 unsigned int *objlocked;
1734 numlocked = transinfo->numlocked;
1735 objlocked = transinfo->objlocked;
1737 int useWriteUnlock = 0;
1738 for (i = 0; i < numlocked; i++) {
1739 if(objlocked[i] == -1) {
1743 if((header = mhashSearch(objlocked[i])) == NULL) {
1744 printf("mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
1747 if(!useWriteUnlock) {
1748 read_unlock(STATUSPTR(header));
1750 write_unlock(STATUSPTR(header));
1757 /*This function completes the COMMIT process if the transaction is commiting*/
1758 int transComProcess(trans_req_data_t *tdata, trans_commit_data_t *transinfo) {
1759 objheader_t *header, *tcptr;
1760 int i, nummod, tmpsize, numcreated, numlocked;
1761 unsigned int *oidmod, *oidcreated, *oidlocked;
1764 printf("%s-> Entering transComProcess, trans.c\n", __func__);
1767 nummod = tdata->f.nummod;
1768 oidmod = tdata->oidmod;
1769 numcreated = tdata->f.numcreated;
1770 oidcreated = tdata->oidcreated;
1771 numlocked = transinfo->numlocked;
1772 oidlocked = transinfo->objlocked;
1775 printf("%s-> nummod: %d, numcreated: %d, numlocked: %d\n", __func__, nummod, numcreated, numlocked);
1778 for (i = 0; i < nummod; i++) {
1779 if((header = (objheader_t *) mhashSearch(oidmod[i])) == NULL) {
1780 printf("Error: transComProcess() mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
1783 /* Copy from transaction cache -> main object store */
1784 if ((tcptr = ((objheader_t *) t_chashSearch(oidmod[i]))) == NULL) {
1785 printf("Error: transComProcess() chashSearch returned NULL at %s, %d\n", __FILE__, __LINE__);
1788 GETSIZE(tmpsize, header);
1789 char *tmptcptr = (char *) tcptr;
1791 struct ___Object___ *dst=(struct ___Object___*)((char*)header+sizeof(objheader_t));
1792 struct ___Object___ *src=(struct ___Object___*)((char*)tmptcptr+sizeof(objheader_t));
1793 dst->___cachedCode___=src->___cachedCode___;
1794 dst->___cachedHash___=src->___cachedHash___;
1796 memcpy(&dst[1], &src[1], tmpsize-sizeof(struct ___Object___));
1799 header->version += 1;
1800 //printf("oid: %u, new header version: %d\n", oidmod[i], header->version);
1801 if(header->notifylist != NULL) {
1803 printf("%s -> type : %d notifylist : %d\n",__func__,TYPE(header),header->notifylist);
1806 if(header->isBackup != 0)
1807 notifyAll(&header->notifylist, OID(header), header->version);
1809 clearNotifyList(OID(header));
1811 notifyAll(&header->notifylist, OID(header), header->version);
1815 /* If object is newly created inside transaction then commit it */
1816 for (i = 0; i < numcreated; i++) {
1817 if ((header = ((objheader_t *) t_chashSearch(oidcreated[i]))) == NULL) {
1818 printf("Error: transComProcess() chashSearch returned NULL for oid = %x at %s, %d\n", oidcreated[i], __FILE__, __LINE__);
1821 header->version += 1;
1822 //printf("oid: %u, new header version: %d\n", oidcreated[i], header->version);
1823 GETSIZE(tmpsize, header);
1824 tmpsize += sizeof(objheader_t);
1825 pthread_mutex_lock(&mainobjstore_mutex);
1826 if ((ptrcreate = objstrAlloc(&mainobjstore, tmpsize)) == NULL) {
1827 printf("Error: transComProcess() failed objstrAlloc %s, %d\n", __FILE__, __LINE__);
1828 pthread_mutex_unlock(&mainobjstore_mutex);
1831 pthread_mutex_unlock(&mainobjstore_mutex);
1832 /* Initialize read and write locks */
1833 initdsmlocks(STATUSPTR(header));
1834 memcpy(ptrcreate, header, tmpsize);
1835 mhashInsert(oidcreated[i], ptrcreate);
1836 lhashInsert(oidcreated[i], myIpAddr);
1838 /* Unlock locked objects */
1839 int useWriteUnlock = 0;
1840 for(i = 0; i < numlocked; i++) {
1841 if(oidlocked[i] == -1) {
1845 if((header = (objheader_t *) mhashSearch(oidlocked[i])) == NULL) {
1846 printf("mhashsearch returns NULL at %s, %d\n", __FILE__, __LINE__);
1849 if(!useWriteUnlock) {
1850 read_unlock(STATUSPTR(header));
1852 write_unlock(STATUSPTR(header));
1858 prefetchpile_t *foundLocal(char *ptr) {
1859 int siteid = *(GET_SITEID(ptr));
1860 int ntuples = *(GET_NTUPLES(ptr));
1861 unsigned int * oidarray = GET_PTR_OID(ptr);
1862 unsigned short * endoffsets = GET_PTR_EOFF(ptr, ntuples);
1863 short * arryfields = GET_PTR_ARRYFLD(ptr, ntuples);
1864 prefetchpile_t * head=NULL;
1868 for(i=0; i<ntuples; i++) {
1869 unsigned short baseindex=(i==0) ? 0 : endoffsets[i-1];
1870 unsigned short endindex=endoffsets[i];
1871 unsigned int oid=oidarray[i];
1876 //Look up fields locally
1877 for(newbase=baseindex; newbase<endindex; newbase++) {
1878 if (!lookupObject(&oid, arryfields[newbase]))
1880 //Ended in a null pointer...
1884 //Entire prefetch is local
1885 if (newbase==endindex&&checkoid(oid)) {
1889 //Add to remote requests
1890 machinenum=lhashSearch(oid);
1891 insertPile(machinenum, oid, endindex-newbase, &arryfields[newbase], &head);
1896 /* handle dynamic prefetching */
1897 handleDynPrefetching(numLocal, ntuples, siteid);
1901 int checkoid(unsigned int oid) {
1902 objheader_t *header;
1903 if ((header=mhashSearch(oid))!=NULL) {
1906 } else if ((header=prehashSearch(oid))!=NULL) {
1914 int lookupObject(unsigned int * oid, short offset) {
1915 objheader_t *header;
1916 if ((header=mhashSearch(*oid))!=NULL) {
1919 } else if ((header=prehashSearch(*oid))!=NULL) {
1926 if(TYPE(header) >= NUMCLASSES) {
1927 int elementsize = classsize[TYPE(header)];
1928 struct ArrayObject *ao = (struct ArrayObject *) (((char *)header) + sizeof(objheader_t));
1929 int length = ao->___length___;
1930 /* Check if array out of bounds */
1931 if(offset < 0 || offset >= length) {
1932 //if yes treat the object as found
1936 (*oid) = *((unsigned int *)(((char *)ao) + sizeof(struct ArrayObject) + (elementsize*offset)));
1939 (*oid) = *((unsigned int *)(((char *)header) + sizeof(objheader_t) + offset));
1945 /* This function is called by the thread calling transPrefetch */
1946 void *transPrefetch(void *t) {
1948 /* read from prefetch queue */
1949 void *node=gettail();
1950 /* Check if the tuples are found locally, if yes then reduce them further*/
1951 /* and group requests by remote machine ids by calling the makePreGroups() */
1952 prefetchpile_t *pilehead = foundLocal(node);
1954 if (pilehead!=NULL) {
1955 // Get sock from shared pool
1957 /* Send Prefetch Request */
1958 prefetchpile_t *ptr = pilehead;
1959 while(ptr != NULL) {
1960 int sd = getSock2(transPrefetchSockPool, ptr->mid);
1961 sendPrefetchReq(ptr, sd);
1965 /* Release socket */
1966 // freeSock(transPrefetchSockPool, pilehead->mid, sd);
1968 /* Deallocated pilehead */
1969 mcdealloc(pilehead);
1971 // Deallocate the prefetch queue pile node
1976 void sendPrefetchReqnew(prefetchpile_t *mcpilenode, int sd) {
1979 int size=sizeof(char)+sizeof(int);
1980 for(tmp=mcpilenode->objpiles; tmp!=NULL; tmp=tmp->next) {
1981 size += sizeof(int) + sizeof(unsigned int) + sizeof(unsigned int) + ((tmp->numoffset) * sizeof(short));
1986 *buf=TRANS_PREFETCH;
1989 for(tmp=mcpilenode->objpiles; tmp!=NULL; tmp=tmp->next) {
1990 int len = sizeof(int) + sizeof(unsigned int) + sizeof(unsigned int) + ((tmp->numoffset) * sizeof(short));
1993 *((unsigned int *)buf)=tmp->oid;
1994 buf+=sizeof(unsigned int);
1995 *((unsigned int *)(buf)) = myIpAddr;
1996 buf+=sizeof(unsigned int);
1997 memcpy(buf, tmp->offset, tmp->numoffset*sizeof(short));
1998 buf+=tmp->numoffset*sizeof(short);
2001 send_data(sd, buft, size);
2005 void sendPrefetchReq(prefetchpile_t *mcpilenode, int sd) {
2010 /* Send TRANS_PREFETCH control message */
2011 control = TRANS_PREFETCH;
2012 send_data(sd, &control, sizeof(char));
2014 /* Send Oids and offsets in pairs */
2015 tmp = mcpilenode->objpiles;
2016 while(tmp != NULL) {
2017 len = sizeof(int) + sizeof(unsigned int) + sizeof(unsigned int) + ((tmp->numoffset) * sizeof(short));
2018 char oidnoffset[len];
2019 char *buf=oidnoffset;
2020 *((int*)buf) = tmp->numoffset;
2022 *((unsigned int *)buf) = tmp->oid;
2023 buf+=sizeof(unsigned int);
2024 *((unsigned int *)buf) = myIpAddr;
2025 buf += sizeof(unsigned int);
2026 memcpy(buf, tmp->offset, (tmp->numoffset)*sizeof(short));
2027 send_data(sd, oidnoffset, len);
2031 /* Send a special char -1 to represent the end of sending oids + offset pair to remote machine */
2033 send_data(sd, &endpair, sizeof(int));
2038 int getPrefetchResponse(int sd) {
2039 int length = 0, size = 0;
2042 void *modptr, *oldptr;
2044 recv_data((int)sd, &length, sizeof(int));
2045 size = length - sizeof(int);
2046 char recvbuffer[size];
2048 recv_data((int)sd, recvbuffer, size);
2049 control = *((char *) recvbuffer);
2050 if(control == OBJECT_FOUND) {
2051 oid = *((unsigned int *)(recvbuffer + sizeof(char)));
2052 size = size - (sizeof(char) + sizeof(unsigned int));
2053 pthread_mutex_lock(&prefetchcache_mutex);
2054 if ((modptr = prefetchobjstrAlloc(size)) == NULL) {
2055 printf("Error: objstrAlloc error for copying into prefetch cache %s, %d\n", __FILE__, __LINE__);
2056 pthread_mutex_unlock(&prefetchcache_mutex);
2059 pthread_mutex_unlock(&prefetchcache_mutex);
2060 memcpy(modptr, recvbuffer + sizeof(char) + sizeof(unsigned int), size);
2063 /* Insert the oid and its address into the prefetch hash lookup table */
2064 /* Do a version comparison if the oid exists */
2065 if((oldptr = prehashSearch(oid)) != NULL) {
2066 /* If older version then update with new object ptr */
2067 if(((objheader_t *)oldptr)->version <= ((objheader_t *)modptr)->version) {
2069 prehashInsert(oid, modptr);
2071 } else { /* Else add the object ptr to hash table*/
2072 prehashInsert(oid, modptr);
2074 /* Lock the Prefetch Cache look up table*/
2075 pthread_mutex_lock(&pflookup.lock);
2076 /* Broadcast signal on prefetch cache condition variable */
2077 pthread_cond_broadcast(&pflookup.cond);
2078 /* Unlock the Prefetch Cache look up table*/
2079 pthread_mutex_unlock(&pflookup.lock);
2080 } else if(control == OBJECT_NOT_FOUND) {
2081 oid = *((unsigned int *)(recvbuffer + sizeof(char)));
2082 /* TODO: For each object not found query DHT for new location and retrieve the object */
2083 /* Throw an error */
2084 //printf("OBJECT %x NOT FOUND.... THIS SHOULD NOT HAPPEN...TERMINATE PROGRAM\n", oid);
2087 printf("Error: in decoding the control value %d, %s, %d\n",control, __FILE__, __LINE__);
2093 unsigned short getObjType(unsigned int oid) {
2094 objheader_t *objheader;
2095 unsigned short numoffset[] ={0};
2096 short fieldoffset[] ={};
2098 if ((objheader = (objheader_t *) mhashSearch(oid)) == NULL) {
2100 if ((objheader = (objheader_t *) prehashSearch(oid)) == NULL) {
2102 unsigned int mid = lhashSearch(oid);
2103 int sd = getSock2(transReadSockPool, mid);
2104 char remotereadrequest[sizeof(char)+sizeof(unsigned int)];
2105 remotereadrequest[0] = READ_REQUEST;
2106 *((unsigned int *)(&remotereadrequest[1])) = oid;
2107 send_data(sd, remotereadrequest, sizeof(remotereadrequest));
2109 /* Read response from the Participant */
2111 recv_data(sd, &control, sizeof(char));
2113 if (control==OBJECT_NOT_FOUND) {
2114 printf("Error: in %s() THIS SHOULD NOT HAPPEN.....EXIT PROGRAM\n", __func__);
2118 /* Read object if found into local cache */
2120 recv_data(sd, &size, sizeof(int));
2122 pthread_mutex_lock(&prefetchcache_mutex);
2123 if ((objheader = prefetchobjstrAlloc(size)) == NULL) {
2124 printf("Error: %s() objstrAlloc error for copying into prefetch cache %s, %d\n", __func__, __FILE__, __LINE__);
2127 pthread_mutex_unlock(&prefetchcache_mutex);
2128 recv_data(sd, objheader, size);
2129 prehashInsert(oid, objheader);
2130 return TYPE(objheader);
2133 if((buffer = calloc(1, size)) == NULL) {
2134 printf("%s() Calloc Error %s at line %d\n", __func__, __FILE__, __LINE__);
2138 recv_data(sd, buffer, size);
2139 objheader = (objheader_t *)buffer;
2140 unsigned short type = TYPE(objheader);
2149 return TYPE(objheader);
2152 int startRemoteThread(unsigned int oid, unsigned int mid) {
2154 struct sockaddr_in remoteAddr;
2155 char msg[1 + sizeof(unsigned int)];
2159 if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
2160 perror("startRemoteThread():socket()");
2164 bzero(&remoteAddr, sizeof(remoteAddr));
2165 remoteAddr.sin_family = AF_INET;
2166 remoteAddr.sin_port = htons(LISTEN_PORT);
2167 remoteAddr.sin_addr.s_addr = htonl(mid);
2169 if (connect(sock, (struct sockaddr *)&remoteAddr, sizeof(remoteAddr)) < 0) {
2170 printf("startRemoteThread():error %d connecting to %s:%d\n", errno,
2171 inet_ntoa(remoteAddr.sin_addr), LISTEN_PORT);
2175 msg[0] = START_REMOTE_THREAD;
2176 *((unsigned int *) &msg[1]) = oid;
2177 send_data(sock, msg, 1 + sizeof(unsigned int));
2184 //TODO: when reusing oids, make sure they are not already in use!
2185 static unsigned int id = 0xFFFFFFFF;
2186 unsigned int getNewOID(void) {
2188 if (id > oidMax || id < oidMin) {
2194 static unsigned int tid = 0xFFFFFFFF;
2195 unsigned int getNewTransID(void) {
2197 if (tid > transIDMax || tid < transIDMin) {
2198 tid = (transIDMin | 1);
2203 int processConfigFile() {
2205 const int maxLineLength = 200;
2206 char lineBuffer[maxLineLength];
2208 const char *delimiters = " \t\n";
2213 configFile = fopen(CONFIG_FILENAME, "r");
2214 if (configFile == NULL) {
2215 printf("error opening %s:\n", CONFIG_FILENAME);
2220 numHostsInSystem = 0;
2221 sizeOfHostArray = 8;
2222 hostIpAddrs = calloc(sizeOfHostArray, sizeof(unsigned int));
2224 liveHosts = calloc(sizeOfHostArray, sizeof(unsigned int));
2225 locateObjHosts = calloc(sizeOfHostArray*2, sizeof(unsigned int));
2230 while(fgets(lineBuffer, maxLineLength, configFile) != NULL) {
2231 commentBegin = strchr(lineBuffer, '#');
2232 if (commentBegin != NULL)
2233 *commentBegin = '\0';
2234 token = strtok(lineBuffer, delimiters);
2235 while (token != NULL) {
2236 tmpAddr = inet_addr(token);
2237 if ((int)tmpAddr == -1) {
2238 printf("error in %s: bad token:%s\n", CONFIG_FILENAME, token);
2242 addHost(htonl(tmpAddr));
2243 token = strtok(NULL, delimiters);
2249 if (numHostsInSystem < 1) {
2250 printf("error in %s: no IP Adresses found\n", CONFIG_FILENAME);
2254 myIpAddr = getMyIpAddr("en1");
2256 myIpAddr = getMyIpAddr("eth0");
2258 myIndexInHostArray = findHost(myIpAddr);
2260 liveHosts[myIndexInHostArray] = 1;
2261 //locateObjHosts[myIndexInHostArray] = myIpAddr;
2263 if (myIndexInHostArray == -1) {
2264 printf("error in %s: IP Address of eth0 not found\n", CONFIG_FILENAME);
2267 oidsPerBlock = (0xFFFFFFFF / numHostsInSystem) + 1;
2268 oidMin = oidsPerBlock * myIndexInHostArray;
2269 if (myIndexInHostArray == numHostsInSystem - 1)
2270 oidMax = 0xFFFFFFFF;
2272 oidMax = oidsPerBlock * (myIndexInHostArray + 1) - 1;
2274 transIDMin = oidMin;
2275 transIDMax = oidMax;
2284 unsigned int getDuplicatedPrimaryMachine(unsigned int mid) {
2286 for(i = 0; i < numHostsInSystem; i++) {
2287 if(mid == locateObjHosts[(i*2)+1]) {
2288 return locateObjHosts[i*2];
2294 unsigned int getPrimaryMachine(unsigned int mid) {
2296 int pmidindex = 2*findHost(mid);
2298 pthread_mutex_lock(&liveHosts_mutex);
2299 pmid = locateObjHosts[pmidindex];
2300 pthread_mutex_unlock(&liveHosts_mutex);
2304 unsigned int getBackupMachine(unsigned int mid) {
2306 int bmidindex = 2*findHost(mid)+1;
2308 pthread_mutex_lock(&liveHosts_mutex);
2309 bmid = locateObjHosts[bmidindex];
2310 pthread_mutex_unlock(&liveHosts_mutex);
2314 int getStatus(int mid) {
2316 printf("%s -> host %s : %s\n",__func__,midtoIPString(hostIpAddrs[mid]),(liveHosts[mid] == 1)?"LIVE":"DEAD");
2318 return liveHosts[mid];
2323 // updates the leader's liveHostArray and locateObj
2324 unsigned int updateLiveHosts() {
2326 printf("%s-> Entering updateLiveHosts\n", __func__);
2328 // update everyone's list
2331 //foreach in hostipaddrs, ping -> update list of livemachines
2332 //socket connection?
2335 //liveHosts lock here
2336 int sd = 0, i, j, tmpNumLiveHosts = 0;
2337 for(i = 0; i < numHostsInSystem; i++) {
2338 if(i == myIndexInHostArray)
2343 for(j = 0; j < 5; j++) { // hard define num of retries
2344 if((sd = getSockWithLock(transPrefetchSockPool, hostIpAddrs[i])) < 0) {
2346 printf("%s -> Cannot create socket connection to [%s], attempt %d\n", __func__, midtoIPString(hostIpAddrs[i]), j);
2359 char liverequest[sizeof(char)];
2360 liverequest[0] = RESPOND_LIVE;
2362 send_data(sd, &liverequest[0], sizeof(liverequest));
2365 int timeout = recv_data(sd, &response, sizeof(response));
2368 //if timeout, dead host
2369 if(response == LIVE) {
2379 freeSockWithLock(transPrefetchSockPool,hostIpAddrs[i],sd);
2383 if(liveHosts[i] == 0)
2385 printf("updateLiveHosts(): cannot make connection to machine %s\n", midtoIPString(hostIpAddrs[i]));
2388 numLiveHostsInSystem = tmpNumLiveHosts;
2390 printf("numLiveHostsInSystem:%d\n", numLiveHostsInSystem);
2392 //have updated list of live machines
2394 printf("%s-> Exiting updateLiveHosts\n", __func__);
2401 int getNumLiveHostsInSystem() {
2402 int count = 0, i = 0;
2403 for(; i<numHostsInSystem; i++) {
2410 int updateLiveHostsCommit() {
2412 printf("%s -> Enter\n",__func__);
2416 char updaterequest[sizeof(char)+sizeof(int)*numHostsInSystem+sizeof(unsigned int)*(numHostsInSystem*2)];
2418 updaterequest[0] = UPDATE_LIVE_HOSTS;
2420 for(i = 0; i < numHostsInSystem; i++) {
2421 *((int *)(&updaterequest[i*4+1])) = liveHosts[i]; // clean this up later
2424 for(i = 0; i < numHostsInSystem*2; i++) {
2425 *((unsigned int *)(&updaterequest[i*4+(numHostsInSystem*4)+1])) = locateObjHosts[i]; //ditto
2428 //for each machine send data
2430 for(i = 0; i < numHostsInSystem; i++) { // hard define num of retries
2431 if(i == myIndexInHostArray)
2433 if(liveHosts[i] == 1) {
2434 if((sd = getSockWithLock(transPrefetchSockPool, hostIpAddrs[i])) < 0) {
2435 printf("%s -> socket create error, attempt %d\n",__func__, i);
2438 send_data(sd, updaterequest, sizeof(updaterequest));
2439 freeSockWithLock(transPrefetchSockPool,hostIpAddrs[i],sd);
2445 printf("%s -> Finish\n",__func__);
2451 void setLocateObjHosts() {
2452 int i = 0, validIndex = 0;
2454 //check num hosts even valid first
2456 for(;i < numHostsInSystem; i++) {
2458 printf("%s-> i:%d\n", __func__, i);
2461 while(liveHosts[(i+validIndex)%numHostsInSystem] == 0) {
2464 locateObjHosts[i*2] = hostIpAddrs[(i+validIndex)%numHostsInSystem];
2466 printf("%s-> locateObjHosts[%d]:%s\n", __func__, i*2, midtoIPString(locateObjHosts[(i*2)]));
2470 while(liveHosts[(i+validIndex)%numHostsInSystem] == 0) {
2474 printf("%s-> validIndex:%d, this mid is: [%s]\n", __func__, validIndex, midtoIPString(hostIpAddrs[(i+validIndex)%numHostsInSystem]));
2476 locateObjHosts[(i*2)+1] = hostIpAddrs[(i+validIndex)%numHostsInSystem];
2480 printf("%s-> locateObjHosts[%d]:%s\n", __func__, i*2+1, midtoIPString(locateObjHosts[(i*2)+1]));
2485 void setReLocateObjHosts(int mid)
2487 int mIndex = findHost(mid);
2488 int backupMachine = getBackupMachine(mid);
2489 int newPrimary = getDuplicatedPrimaryMachine(mid);
2490 int newPrimaryIndex = findHost(newPrimary);
2493 locateObjHosts[2*newPrimaryIndex+1] = backupMachine;
2494 locateObjHosts[2*mIndex] = newPrimary;
2496 /* relocate the objects of the machines already dead */
2497 for(i=0; i<numHostsInSystem *2; i+=2) {
2498 if(locateObjHosts[i] == mid)
2499 locateObjHosts[i] = newPrimary;
2500 if(locateObjHosts[i+1] == mid)
2501 locateObjHosts[i+1] = backupMachine;
2507 void printHostsStatus() {
2509 printf("%s-> *printing live machines and backups*\n", __func__);
2510 for(i = 0; i < numHostsInSystem; i++) {
2512 printf("%s-> [%s]: LIVE\n", __func__, midtoIPString(hostIpAddrs[i]));
2515 printf("%s-> [%s]: DEAD\n", __func__, midtoIPString(hostIpAddrs[i]));
2517 printf("%s-> original:\t[%s]\n", __func__, midtoIPString(locateObjHosts[i*2]));
2518 printf("%s-> backup:\t[%s]\n", __func__, midtoIPString(locateObjHosts[i*2+1]));
2522 int allHostsLive() {
2524 for(i = 0; i < numHostsInSystem; i++) {
2531 void duplicateLostObjects(unsigned int mid){
2533 printf("%s-> Start, mid: [%s]\n", __func__, midtoIPString(mid));
2535 //this needs to be changed.
2536 unsigned int backupMid = getBackupMachine(mid); // get backup machine of dead machine
2537 unsigned int originalMid = getDuplicatedPrimaryMachine(mid); // get primary machine that used deadmachine as backup machine.
2540 printf("%s-> backupMid: [%s], ", __func__, midtoIPString(backupMid));
2541 printf("originalMid: [%s]\n", midtoIPString(originalMid));
2545 setReLocateObjHosts(mid);
2547 //connect to these machines
2548 //go through their object store copying necessary (in a transaction)
2549 //transRequestSockPool = createSockPool(transRequestSockPool, DEFAULTSOCKPOOLSIZE);
2550 int sd = 0, i, j, tmpNumLiveHosts = 0;
2552 /* duplicateLostObject example
2563 if(originalMid == myIpAddr) { // copy local machine's backup data, make it as primary data of backup machine.
2564 duplicateLocalOriginalObjects(backupMid);
2566 else if((sd = getSockWithLock(transPrefetchSockPool, originalMid)) < 0) {
2567 printf("%s -> socket create error, attempt %d\n", __func__,j);
2571 else { // if original is not local
2573 duperequest = DUPLICATE_ORIGINAL;
2574 send_data(sd, &duperequest, sizeof(char));
2576 printf("%s-> SD : %d Sent DUPLICATE_ORIGINAL request to %s\n", __func__,sd,midtoIPString(originalMid));
2578 send_data(sd, &backupMid, sizeof(unsigned int));
2581 recv_data(sd, &response, sizeof(char));
2583 printf("%s (DUPLICATE_ORIGINAL) -> Received %s\n", __func__,(response==DUPLICATION_COMPLETE)?"DUPLICATION_COMPLETE":"DUPLICATION_FAIL");
2586 freeSockWithLock(transPrefetchSockPool, originalMid, sd);
2589 if(backupMid == myIpAddr) { // copy local machine's primary data, and make it as backup data of original machine.
2590 duplicateLocalBackupObjects(originalMid);
2592 else if((sd = getSockWithLock(transPrefetchSockPool, backupMid)) < 0) {
2593 printf("updateLiveHosts(): socket create error, attempt %d\n", j);
2598 duperequest = DUPLICATE_BACKUP;
2599 send_data(sd, &duperequest, sizeof(char));
2601 printf("%s-> SD : %d Sent DUPLICATE_BACKUP request to %s\n", __func__,sd,midtoIPString(backupMid));
2603 send_data(sd, &originalMid, sizeof(unsigned int));
2606 recv_data(sd, &response, sizeof(char));
2608 printf("%s (DUPLICATE_BACKUP) -> Received %s\n", __func__,(response==DUPLICATION_COMPLETE)?"DUPLICATION_COMPLETE":"DUPLICATION_FAIL");
2611 freeSockWithLock(transPrefetchSockPool, backupMid, sd);
2615 printf("%s-> End\n", __func__);
2619 void duplicateLocalBackupObjects(unsigned int mid) {
2622 char *dupeptr, ctrl, response;
2624 printf("%s-> Start; backup mid:%s\n", __func__, midtoIPString(mid));
2627 //copy code from dstmserver here
2628 tempsize = mhashGetDuplicate((void**)&dupeptr, 1);
2631 printf("tempsize:%d, dupeptrfirstvalue:%d\n", tempsize, *((unsigned int *)(dupeptr)));
2633 //send control and dupes after
2634 ctrl = RECEIVE_DUPES;
2635 if((sd = getSockWithLock(transPrefetchSockPool, mid)) < 0) {
2636 printf("duplicatelocalbackup: socket create error\n");
2640 printf("%s -> sd:%d, tempsize:%d, dupeptrfirstvalue:%d\n", __func__,sd, tempsize, *((unsigned int *)(dupeptr)));
2642 send_data(sd, &ctrl, sizeof(char));
2643 send_data(sd, dupeptr, tempsize);
2645 recv_data(sd, &response, sizeof(char));
2646 freeSockWithLock(transPrefetchSockPool,mid,sd);
2649 printf("%s ->response : %d - %d\n",__func__,response,DUPLICATION_COMPLETE);
2652 if(response != DUPLICATION_COMPLETE) {
2654 printf("%s -> DUPLICATION_FAIL\n",__func__);
2661 printf("%s-> End\n", __func__);
2666 void duplicateLocalOriginalObjects(unsigned int mid) {
2668 char *dupeptr, ctrl, response;
2671 printf("%s-> Start\n", __func__);
2673 //copy code fom dstmserver here
2675 tempsize = mhashGetDuplicate((void**)&dupeptr, 0);
2677 //send control and dupes after
2678 ctrl = RECEIVE_DUPES;
2680 if((sd = getSockWithLock(transPrefetchSockPool, mid)) < 0) {
2681 printf("DUPLICATE_ORIGINAL: socket create error\n");
2685 printf("sd:%d, tempsize:%d, dupeptrfirstvalue:%d\n", sd, tempsize, *((unsigned int *)(dupeptr)));
2688 send_data(sd, &ctrl, sizeof(char));
2689 send_data(sd, dupeptr, tempsize);
2691 recv_data(sd, &response, sizeof(char));
2692 freeSockWithLock(transPrefetchSockPool,mid,sd);
2695 printf("%s ->response : %d - %d\n",__func__,response,DUPLICATION_COMPLETE);
2698 if(response != DUPLICATION_COMPLETE) {
2701 printf("%s -> DUPLICATION_FAIL\n",__func__);
2709 printf("%s-> End\n", __func__);
2716 void addHost(unsigned int hostIp) {
2717 unsigned int *tmpArray;
2718 int *tmpliveHostsArray;
2719 unsigned int *tmplocateObjHostsArray;
2721 if (findHost(hostIp) != -1)
2724 if (numHostsInSystem == sizeOfHostArray) {
2725 tmpArray = calloc(sizeOfHostArray * 2, sizeof(unsigned int));
2726 memcpy(tmpArray, hostIpAddrs, sizeof(unsigned int) * numHostsInSystem);
2728 hostIpAddrs = tmpArray;
2731 tmpliveHostsArray = calloc(sizeOfHostArray * 2, sizeof(unsigned int));
2732 memcpy(tmpliveHostsArray, liveHosts, sizeof(unsigned int) * numHostsInSystem);
2734 liveHosts = tmpliveHostsArray;
2736 tmplocateObjHostsArray = calloc(sizeOfHostArray * 2 * 2, sizeof(unsigned int));
2737 memcpy(tmplocateObjHostsArray, locateObjHosts, sizeof(unsigned int) * numHostsInSystem);
2738 free(locateObjHosts);
2739 locateObjHosts = tmplocateObjHostsArray;
2741 sizeOfHostArray *= 2;
2744 hostIpAddrs[numHostsInSystem] = hostIp;
2747 liveHosts[numHostsInSystem] = 0;
2748 locateObjHosts[numHostsInSystem*2] = hostIp;
2755 int findHost(unsigned int hostIp) {
2757 for (i = 0; i < numHostsInSystem; i++)
2758 if (hostIpAddrs[i] == hostIp)
2765 /* This function sends notification request per thread waiting on object(s) whose version
2768 int reqNotify(unsigned int *oidarry, unsigned short *versionarry, unsigned int numoid, int waitmid) {
2770 int reqNotify(unsigned int *oidarry, unsigned short *versionarry, unsigned int numoid) {
2773 objheader_t *objheader;
2774 struct sockaddr_in premoteAddr;
2775 char msg[1 + numoid * (sizeof(unsigned short) + sizeof(unsigned int)) + 3 * sizeof(unsigned int)];
2778 unsigned short version;
2779 unsigned int oid,mid;
2780 static unsigned int threadid = 0;
2781 pthread_mutex_t threadnotify = PTHREAD_MUTEX_INITIALIZER; //Lock and condition var for threadjoin and notification
2782 pthread_cond_t threadcond = PTHREAD_COND_INITIALIZER;
2783 notifydata_t *ndata;
2787 struct sockaddr_in bremoteAddr;
2792 if((mid = lhashSearch(oid)) == 0) {
2793 printf("Error: %s() No such machine found for oid =%x\n",__func__, oid);
2797 int pmid = getPrimaryMachine(mid);
2798 int bmid = getBackupMachine(mid);
2804 if ((psock = socket(AF_INET, SOCK_STREAM, 0)) < 0 ||
2805 (bsock = socket(AF_INET, SOCK_STREAM, 0)) < 0 ) {
2807 if ((psock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
2809 perror("reqNotify():socket()");
2813 /* for primary machine */
2814 bzero(&premoteAddr, sizeof(premoteAddr));
2815 premoteAddr.sin_family = AF_INET;
2816 premoteAddr.sin_port = htons(LISTEN_PORT);
2817 premoteAddr.sin_addr.s_addr = htonl(pmid);
2820 /* for backup machine */
2821 bzero(&bremoteAddr, sizeof(bremoteAddr));
2822 bremoteAddr.sin_family = AF_INET;
2823 bremoteAddr.sin_port = htons(LISTEN_PORT);
2824 bremoteAddr.sin_addr.s_addr = htonl(bmid);
2826 /* Generate unique threadid */
2829 /* Save threadid, numoid, oidarray, versionarray, pthread_cond_variable for later processing */
2830 if((ndata = calloc(1, sizeof(notifydata_t))) == NULL) {
2831 printf("Calloc Error %s, %d\n", __FILE__, __LINE__);
2834 ndata->numoid = numoid;
2835 ndata->threadid = threadid;
2836 ndata->oidarry = oidarry;
2837 ndata->versionarry = versionarry;
2838 ndata->threadcond = threadcond;
2839 ndata->threadnotify = threadnotify;
2840 if((status = notifyhashInsert(threadid, ndata)) != 0) {
2841 printf("reqNotify(): Insert into notify hash table not successful %s, %d\n", __FILE__, __LINE__);
2846 /* Send number of oids, oidarry, version array, machine id and threadid */
2848 if ((connect(psock, (struct sockaddr *)&premoteAddr, sizeof(premoteAddr))< 0) ||
2849 (connect(bsock, (struct sockaddr *)&bremoteAddr, sizeof(bremoteAddr))< 0)) {
2851 if ((connect(psock, (struct sockaddr *)&premoteAddr, sizeof(premoteAddr))< 0)) {
2853 printf("reqNotify():error %d connecting to %s:%d\n", errno,
2854 inet_ntoa(premoteAddr.sin_addr), LISTEN_PORT);
2860 printf("%s -> Pmid = %s\n",__func__,midtoIPString(pmid));
2862 printf("%s -> Bmid = %s\n",__func__,midtoIPString(bmid));
2866 msg[0] = THREAD_NOTIFY_REQUEST;
2868 *((unsigned int *)(&msg[1])) = numoid;
2869 /* Send array of oids */
2870 size = sizeof(unsigned int);
2872 for(i = 0;i < numoid; i++) {
2875 printf("%s -> oid[%d] = %d\n",__func__,i,oidarry[i]);
2877 *((unsigned int *)(&msg[1] + size)) = oid;
2878 size += sizeof(unsigned int);
2881 /* Send array of version */
2882 for(i = 0;i < numoid; i++) {
2883 version = versionarry[i];
2884 *((unsigned short *)(&msg[1] + size)) = version;
2885 size += sizeof(unsigned short);
2888 *((unsigned int *)(&msg[1] + size)) = myIpAddr;
2889 size += sizeof(unsigned int);
2890 *((unsigned int *)(&msg[1] + size)) = threadid;
2892 waitThreadMid = waitmid;
2893 waitThreadID = threadid;
2895 printf("%s -> This Thread is waiting for %s\n",__func__,midtoIPString(waitmid));
2899 size = 1 + numoid * (sizeof(unsigned int) + sizeof(unsigned short)) + 3 * sizeof(unsigned int);
2900 pthread_mutex_lock(&(ndata->threadnotify));
2901 send_data(psock, msg, size);
2903 send_data(bsock, msg, size);
2905 pthread_cond_wait(&(ndata->threadcond), &(ndata->threadnotify));
2906 pthread_mutex_unlock(&(ndata->threadnotify));
2909 pthread_cond_destroy(&threadcond);
2910 pthread_mutex_destroy(&threadnotify);
2921 void threadNotify(unsigned int oid, unsigned short version, unsigned int tid) {
2922 notifydata_t *ndata;
2923 int i, objIsFound = 0, index = -1;
2926 printf("%s -> oid = %d vesion = %d tid = %d\n",__func__,oid,version,tid);
2929 //Look up the tid and call the corresponding pthread_cond_signal
2930 if((ndata = notifyhashSearch(tid)) == NULL) {
2931 printf("threadnotify(): No such threadid is present %s, %d\n", __FILE__, __LINE__);
2934 for(i = 0; i < ndata->numoid; i++) {
2935 if(ndata->oidarry[i] == oid) {
2941 if(objIsFound == 0) {
2942 printf("threadNotify(): Oid not found %s, %d\n", __FILE__, __LINE__);
2946 if(version <= ndata->versionarry[index] && version >= 0) {
2947 printf("threadNotify(): New version %d has not changed since last version for oid = %d, %s, %d\n", version, oid, __FILE__, __LINE__);
2951 /* Clear from prefetch cache and free thread related data structure */
2952 if((ptr = prehashSearch(oid)) != NULL) {
2956 pthread_mutex_lock(&(ndata->threadnotify));
2957 pthread_cond_signal(&(ndata->threadcond));
2958 pthread_mutex_unlock(&(ndata->threadnotify));
2964 printf("%s -> Finished\n",__func__);
2969 int notifyAll(threadlist_t **head, unsigned int oid, unsigned int version) {
2972 struct sockaddr_in remoteAddr;
2973 char msg[1 + sizeof(unsigned short) + 2*sizeof(unsigned int)];
2974 int sock, status, size, bytesSent;
2976 printf("%s -> Entering \n",__func__);
2979 while(*head != NULL) {
2984 printf("%s -> trying to connect MID : %s\n",__func__,midtoIPString(mid));
2987 //create a socket connection to that machine
2988 if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
2989 perror("notifyAll():socket()");
2993 bzero(&remoteAddr, sizeof(remoteAddr));
2994 remoteAddr.sin_family = AF_INET;
2995 remoteAddr.sin_port = htons(LISTEN_PORT);
2996 remoteAddr.sin_addr.s_addr = htonl(mid);
2997 //send Thread Notify response and threadid to that machine
2998 if (connect(sock, (struct sockaddr *)&remoteAddr, sizeof(remoteAddr)) < 0) {
2999 printf("notifyAll():error %d connecting to %s:%d\n", errno,
3000 inet_ntoa(remoteAddr.sin_addr), LISTEN_PORT);
3005 printf("%s -> connected\n",__func__);
3007 bzero(msg, (1+sizeof(unsigned short) + 2*sizeof(unsigned int)));
3008 msg[0] = THREAD_NOTIFY_RESPONSE;
3009 *((unsigned int *)&msg[1]) = oid;
3010 size = sizeof(unsigned int);
3011 *((unsigned short *)(&msg[1]+ size)) = version;
3012 size+= sizeof(unsigned short);
3013 *((unsigned int *)(&msg[1]+ size)) = ptr->threadid;
3015 size = 1 + 2*sizeof(unsigned int) + sizeof(unsigned short);
3016 send_data(sock, msg, size);
3025 printf("%s -> End notifying MID : %s\n",__func__,midtoIPString(mid));
3034 removetransactionhash();
3036 objstrDelete(t_cache);
3040 /* This function inserts necessary information into
3041 * a machine pile data structure */
3042 plistnode_t *pInsert(plistnode_t *pile, objheader_t *headeraddr, unsigned int mid, int num_objs) {
3043 plistnode_t *ptr, *tmp;
3044 int found = 0, offset = 0;
3047 //Add oid into a machine that is already present in the pile linked list structure
3048 while(tmp != NULL) {
3049 // printf("tmp->mid = [%s], mid = [%s]\n", midtoIPString(tmp->mid), midtoIPString(mid));
3050 if (tmp->mid == mid) {
3053 if (STATUS(headeraddr) & NEW) {
3054 tmp->oidcreated[tmp->numcreated] = OID(headeraddr);
3056 GETSIZE(tmpsize, headeraddr);
3057 tmp->sum_bytes += sizeof(objheader_t) + tmpsize;
3058 } else if (STATUS(headeraddr) & DIRTY) {
3059 tmp->oidmod[tmp->nummod] = OID(headeraddr);
3061 GETSIZE(tmpsize, headeraddr);
3062 tmp->sum_bytes += sizeof(objheader_t) + tmpsize;
3063 /* midtoIP(tmp->mid, ip);
3064 printf("pp; Redo? pile->mid: %s, oid: %d, header version: %d\n", ip, OID(headeraddr), headeraddr->version);*/
3066 offset = (sizeof(unsigned int) + sizeof(short)) * tmp->numread;
3067 *((unsigned int *)(((char *)tmp->objread) + offset))=OID(headeraddr);
3068 offset += sizeof(unsigned int);
3069 *((short *)(((char *)tmp->objread) + offset)) = headeraddr->version;
3077 //Add oid for any new machine
3080 if((ptr = pCreate(num_objs)) == NULL) {
3084 if (STATUS(headeraddr) & NEW) {
3085 ptr->oidcreated[ptr->numcreated] = OID(headeraddr);
3087 GETSIZE(tmpsize, headeraddr);
3088 ptr->sum_bytes += sizeof(objheader_t) + tmpsize;
3089 } else if (STATUS(headeraddr) & DIRTY) {
3090 ptr->oidmod[ptr->nummod] = OID(headeraddr);
3092 GETSIZE(tmpsize, headeraddr);
3093 ptr->sum_bytes += sizeof(objheader_t) + tmpsize;
3095 *((unsigned int *)ptr->objread)=OID(headeraddr);
3096 offset = sizeof(unsigned int);
3097 *((short *)(((char *)ptr->objread) + offset)) = headeraddr->version;
3105 STATUS(headeraddr) = 0;
3110 plistnode_t *sortPiles(plistnode_t *pileptr) {
3111 plistnode_t *head, *ptr, *tail;
3114 /* Get tail pointer */
3120 plistnode_t *prev = pileptr;
3121 /* Arrange local machine processing at the end of the pile list */
3122 while(ptr != NULL) {
3124 if(ptr->mid == myIpAddr && (prev != pileptr)) {
3125 prev->next = ptr->next;
3130 if((ptr->mid == myIpAddr) && (prev == pileptr)) {
3145 * Executes when the known leader has failed.
3146 * Guarantees consensus on next leader among all live hosts. */
3149 int origRound = paxosRound;
3150 origleader = leader;
3153 printf(">> Debug : Starting paxos..\n");
3157 ret = paxosPrepare(); // phase 1
3159 ret = paxosAccept(); // phase 2
3161 paxosLearn(); // phase 3
3165 // Paxos not successful; wait and retry if new leader is not yet slected
3167 if(paxosRound != origRound)
3169 } while (ret == -1);
3172 printf("\n>> Debug : Leader : [%s]\t[%u]\n", midtoIPString(leader),leader);
3181 //int origleader = leader;
3192 printf("[Prepare]...\n");
3195 temp_v_a = myIpAddr; // if no other value is proposed, make this machine the new leader
3197 for (i = 0; i < numHostsInSystem; ++i) {
3198 control = PAXOS_PREPARE;
3202 if ((sd = getSockWithLock(transPrefetchSockPool, hostIpAddrs[i])) < 0) {
3203 printf("paxosPrepare(): socket create error\n");
3207 printf("%s-> Send PAXOS_PREPARE to mid [%s] with my_n=%d\n", __func__, midtoIPString(hostIpAddrs[i]), my_n);
3209 send_data(sd, &control, sizeof(char));
3210 send_data(sd, &my_n, sizeof(int));
3211 int timeout = recv_data(sd, &control, sizeof(char));
3212 if ((sd == -1) || (timeout < 0)) {
3214 printf("%s-> timeout to machine [%s]\n", __func__, midtoIPString(hostIpAddrs[i]));
3220 case PAXOS_PREPARE_OK:
3222 recv_data(sd, &remote_n, sizeof(int));
3223 recv_data(sd, &remote_v, sizeof(int));
3225 printf("%s-> Received PAXOS_PREPARE_OK from mindex [%d] with remote_v=%s\n", __func__, i, midtoIPString(remote_v));
3227 if(remote_v != origleader) {
3228 if (remote_n > tmp_n) {
3230 temp_v_a = remote_v;
3234 case PAXOS_PREPARE_REJECT:
3238 freeSockWithLock(transPrefetchSockPool,hostIpAddrs[i],sd);
3242 printf("%s-> cnt:%d, numLiveHostsInSystem:%d\n", __func__, cnt, numLiveHostsInSystem);
3245 if (cnt >= (numLiveHostsInSystem / 2)) { // majority of OK replies
3259 int remote_v = temp_v_a;
3262 printf("[Accept]...\n");
3264 for (i = 0; i < numHostsInSystem; ++i) {
3265 control = PAXOS_ACCEPT;
3270 if ((sd = getSockWithLock(transPrefetchSockPool, hostIpAddrs[i])) < 0) {
3271 printf("paxosAccept(): socket create error\n");
3275 send_data(sd, &control, sizeof(char));
3276 send_data(sd, &my_n, sizeof(int));
3277 send_data(sd, &remote_v, sizeof(int));
3279 int timeout = recv_data(sd, &control, sizeof(char));
3280 if ((sd == -1) || (timeout < 0)) {
3282 printf("%s-> timeout to machine [%s]\n", __func__, midtoIPString(hostIpAddrs[i]));
3288 case PAXOS_ACCEPT_OK:
3291 case PAXOS_ACCEPT_REJECT:
3295 printf(">> Debug : Accept - n_h [%d], n_a [%d], v_a [%s]\n", n_h, n_a, midtoIPString(v_a));
3297 freeSockWithLock(transPrefetchSockPool,hostIpAddrs[i],sd);
3300 if (cnt >= (numLiveHostsInSystem / 2)) {
3315 printf("[Learn]...\n");
3318 control = PAXOS_LEARN;
3320 for (i = 0; i < numHostsInSystem; ++i) {
3323 if(hostIpAddrs[i] == myIpAddr)
3328 printf("This is my leader!!!: [%s]\n", midtoIPString(leader));
3332 if ((sd = getSockWithLock(transPrefetchSockPool, hostIpAddrs[i])) < 0) {
3334 // printf("paxosLearn(): socket create error, attemp\n");
3337 send_data(sd, &control, sizeof(char));
3338 send_data(sd, &v_a, sizeof(int));
3340 freeSockWithLock(transPrefetchSockPool,hostIpAddrs[i],sd);
3347 void clearDeadThreadsNotification()
3351 printf("%s -> Entered\n",__func__);
3353 // clear all the threadnotify request first
3355 if(waitThreadID != -1) {
3357 printf("%s -> I was waitng for %s\n",__func__,midtoIPString(waitThreadMid));
3359 int waitThreadIndex = findHost(waitThreadMid);
3361 notifydata_t *ndata;
3363 if(liveHosts[waitThreadIndex] == 0) // the thread waiting for is dead
3365 if((ndata = (notifydata_t*)notifyhashSearch(waitThreadID)) == NULL) {
3369 for(i =0 ; i < ndata->numoid; i++) {
3370 clearNotifyList(ndata->oidarry[i]); // clear thread object's notifylist
3373 pthread_mutex_lock(&(ndata->threadnotify));
3374 pthread_cond_signal(&(ndata->threadcond));
3375 pthread_mutex_unlock(&(ndata->threadnotify));
3383 printf("%s -> Finished\n",__func__);
3387 /* request the primary and the backup machines to clear
3388 thread obj's notify list */
3389 void reqClearNotifyList(unsigned int oid)
3393 objheader_t *objheader;
3394 struct sockaddr_in premoteAddr, bremoteAddr;
3395 char msg[1 + sizeof(unsigned int)];
3397 if((mid = lhashSearch(oid)) == 0) {
3398 printf("%s -> No such machine found for oid %x\n",__func__,oid);
3402 pmid = getPrimaryMachine(mid);
3403 bmid = getBackupMachine(mid);
3405 if((psock = socket(AF_INET, SOCK_STREAM, 0)) < 0 ||
3406 (bsock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
3407 perror("clearNotifyList() : socket()");
3411 /* for primary machine */
3412 bzero(&premoteAddr, sizeof(premoteAddr));
3413 premoteAddr.sin_family = AF_INET;
3414 premoteAddr.sin_port = htons(LISTEN_PORT);
3415 premoteAddr.sin_addr.s_addr = htonl(pmid);
3417 /* for backup machine */
3418 bzero(&bremoteAddr, sizeof(bremoteAddr));
3419 bremoteAddr.sin_family = AF_INET;
3420 bremoteAddr.sin_port = htons(LISTEN_PORT);
3421 bremoteAddr.sin_addr.s_addr = htonl(bmid);
3423 /* send message to both the primary and the backup */
3424 if((connect(psock, (struct sockaddr *)&premoteAddr, sizeof(premoteAddr)) < 0) ||
3425 (connect(bsock, (struct sockaddr *)&bremoteAddr, sizeof(bremoteAddr)) < 0)) {
3426 printf("%s -> error in connecting\n",__func__);
3430 printf("%s -> Pmid = %s\n",__func__,midtoIPString(pmid));
3431 printf("%s -> Bmid = %s\n",__func__,midtoIPString(bmid));
3433 msg[0] = CLEAR_NOTIFY_LIST;
3434 *((unsigned int *)(&msg[1])) = oid;
3436 send_data(psock, &msg, sizeof(char) + sizeof(unsigned int));
3437 send_data(bsock, &msg, sizeof(char) + sizeof(unsigned int));
3446 int checkiftheMachineDead(unsigned int mid) {
3447 int mIndex = findHost(mid);
3448 return getStatus(mIndex);