Remove unused functions and optimize data race checking for mem* operations
authorweiyu <weiyuluo1232@gmail.com>
Wed, 16 Sep 2020 20:55:15 +0000 (13:55 -0700)
committerweiyu <weiyuluo1232@gmail.com>
Wed, 16 Sep 2020 20:55:15 +0000 (13:55 -0700)
datarace.cc
datarace.h
librace.cc
mymemory.cc

index 11811a1..5a1c115 100644 (file)
@@ -125,7 +125,7 @@ void getStoreThreadAndClock(const void *address, thread_id_t * thread, modelcloc
  * @param tid2 The thread ID for the potentially-racing action
  * @return true if the current clock allows a race with the event at clock2/tid2
  */
-static bool clock_may_race(ClockVector *clock1, thread_id_t tid1,
+static inline bool clock_may_race(ClockVector *clock1, thread_id_t tid1,
                                                                                                         modelclock_t clock2, thread_id_t tid2)
 {
        return tid1 != tid2 && clock2 != 0 && clock1->getClock(tid2) <= clock2;
@@ -268,74 +268,6 @@ Exit:
        return race;
 }
 
-/** This function does race detection on a write. */
-void raceCheckWrite(thread_id_t thread, void *location)
-{
-       uint64_t *shadow = lookupAddressEntry(location);
-       uint64_t shadowval = *shadow;
-       ClockVector *currClock = get_execution()->get_cv(thread);
-       if (currClock == NULL)
-               return;
-
-       struct DataRace * race = NULL;
-       /* Do full record */
-       if (shadowval != 0 && !ISSHORTRECORD(shadowval)) {
-               race = fullRaceCheckWrite(thread, location, shadow, currClock);
-               goto Exit;
-       }
-
-       {
-               int threadid = id_to_int(thread);
-               modelclock_t ourClock = currClock->getClock(thread);
-
-               /* Thread ID is too large or clock is too large. */
-               if (threadid > MAXTHREADID || ourClock > MAXWRITEVECTOR) {
-                       expandRecord(shadow);
-                       race = fullRaceCheckWrite(thread, location, shadow, currClock);
-                       goto Exit;
-               }
-
-               {
-                       /* Check for datarace against last read. */
-                       modelclock_t readClock = READVECTOR(shadowval);
-                       thread_id_t readThread = int_to_id(RDTHREADID(shadowval));
-
-                       if (clock_may_race(currClock, thread, readClock, readThread)) {
-                               /* We have a datarace */
-                               race = reportDataRace(readThread, readClock, false, get_execution()->get_parent_action(thread), true, location);
-                               goto ShadowExit;
-                       }
-               }
-
-               {
-                       /* Check for datarace against last write. */
-                       modelclock_t writeClock = WRITEVECTOR(shadowval);
-                       thread_id_t writeThread = int_to_id(WRTHREADID(shadowval));
-
-                       if (clock_may_race(currClock, thread, writeClock, writeThread)) {
-                               /* We have a datarace */
-                               race = reportDataRace(writeThread, writeClock, true, get_execution()->get_parent_action(thread), true, location);
-                               goto ShadowExit;
-                       }
-               }
-
-ShadowExit:
-               *shadow = ENCODEOP(0, 0, threadid, ourClock);
-       }
-
-Exit:
-       if (race) {
-#ifdef REPORT_DATA_RACES
-               race->numframes=backtrace(race->backtrace, sizeof(race->backtrace)/sizeof(void*));
-               if (raceset->add(race))
-                       assert_race(race);
-               else model_free(race);
-#else
-               model_free(race);
-#endif
-       }
-}
-
 /** This function does race detection for a write on an expanded record. */
 struct DataRace * atomfullRaceCheckWrite(thread_id_t thread, const void *location, uint64_t *shadow, ClockVector *currClock)
 {
@@ -593,74 +525,6 @@ struct DataRace * fullRaceCheckRead(thread_id_t thread, const void *location, ui
        return race;
 }
 
-/** This function does race detection on a read. */
-void raceCheckRead(thread_id_t thread, const void *location)
-{
-       uint64_t *shadow = lookupAddressEntry(location);
-       uint64_t shadowval = *shadow;
-       ClockVector *currClock = get_execution()->get_cv(thread);
-       if (currClock == NULL)
-               return;
-
-       struct DataRace * race = NULL;
-
-       /* Do full record */
-       if (shadowval != 0 && !ISSHORTRECORD(shadowval)) {
-               race = fullRaceCheckRead(thread, location, shadow, currClock);
-               goto Exit;
-       }
-
-       {
-               int threadid = id_to_int(thread);
-               modelclock_t ourClock = currClock->getClock(thread);
-
-               /* Thread ID is too large or clock is too large. */
-               if (threadid > MAXTHREADID || ourClock > MAXWRITEVECTOR) {
-                       expandRecord(shadow);
-                       race = fullRaceCheckRead(thread, location, shadow, currClock);
-                       goto Exit;
-               }
-
-               /* Check for datarace against last write. */
-
-               modelclock_t writeClock = WRITEVECTOR(shadowval);
-               thread_id_t writeThread = int_to_id(WRTHREADID(shadowval));
-
-               if (clock_may_race(currClock, thread, writeClock, writeThread)) {
-                       /* We have a datarace */
-                       race = reportDataRace(writeThread, writeClock, true, get_execution()->get_parent_action(thread), false, location);
-                       goto ShadowExit;
-               }
-
-ShadowExit:
-               {
-                       modelclock_t readClock = READVECTOR(shadowval);
-                       thread_id_t readThread = int_to_id(RDTHREADID(shadowval));
-
-                       if (clock_may_race(currClock, thread, readClock, readThread)) {
-                               /* We don't subsume this read... Have to expand record. */
-                               expandRecord(shadow);
-                               fullRaceCheckRead(thread, location, shadow, currClock);
-                               goto Exit;
-                       }
-               }
-
-               *shadow = ENCODEOP(threadid, ourClock, id_to_int(writeThread), writeClock) | (shadowval & ATOMICMASK);
-       }
-Exit:
-       if (race) {
-#ifdef REPORT_DATA_RACES
-               race->numframes=backtrace(race->backtrace, sizeof(race->backtrace)/sizeof(void*));
-               if (raceset->add(race))
-                       assert_race(race);
-               else model_free(race);
-#else
-               model_free(race);
-#endif
-       }
-}
-
-
 /** This function does race detection on a read for an expanded record. */
 struct DataRace * atomfullRaceCheckRead(thread_id_t thread, const void *location, uint64_t *shadow, ClockVector *currClock)
 {
@@ -710,8 +574,6 @@ void atomraceCheckRead(thread_id_t thread, const void *location)
                        race = reportDataRace(writeThread, writeClock, true, get_execution()->get_parent_action(thread), false, location);
                        goto Exit;
                }
-
-
        }
 Exit:
        if (race) {
@@ -797,11 +659,10 @@ Exit:
        return shadow;
 }
 
-static inline void raceCheckRead_otherIt(thread_id_t thread, const void * location) {
+static inline void raceCheckRead_otherIt(thread_id_t thread, const void * location)
+{
        uint64_t *shadow = lookupAddressEntry(location);
-
        uint64_t shadowval = *shadow;
-
        ClockVector *currClock = get_execution()->get_cv(thread);
        if (currClock == NULL)
                return;
@@ -978,12 +839,10 @@ void raceCheckRead8(thread_id_t thread, const void *location)
        int old_flag = GET_MODEL_FLAG;
        ENTER_MODEL_FLAG;
 
-       uint64_t old_shadowval, new_shadowval;
-       old_shadowval = new_shadowval = INVALIDSHADOWVAL;
 #ifdef COLLECT_STAT
        load8_count++;
 #endif
-       raceCheckRead_firstIt(thread, location, &old_shadowval, &new_shadowval);
+       raceCheckRead_otherIt(thread, location);
        RESTORE_MODEL_FLAG(old_flag);
 }
 
@@ -1059,11 +918,10 @@ Exit:
        return shadow;
 }
 
-static inline void raceCheckWrite_otherIt(thread_id_t thread, const void * location) {
+static inline void raceCheckWrite_otherIt(thread_id_t thread, const void * location)
+{
        uint64_t *shadow = lookupAddressEntry(location);
-
        uint64_t shadowval = *shadow;
-
        ClockVector *currClock = get_execution()->get_cv(thread);
        if (currClock == NULL)
                return;
@@ -1242,12 +1100,170 @@ void raceCheckWrite8(thread_id_t thread, const void *location)
        int old_flag = GET_MODEL_FLAG;
        ENTER_MODEL_FLAG;
 
-       uint64_t old_shadowval, new_shadowval;
-       old_shadowval = new_shadowval = INVALIDSHADOWVAL;
 #ifdef COLLECT_STAT
        store8_count++;
 #endif
-       raceCheckWrite_firstIt(thread, location, &old_shadowval, &new_shadowval);
+       raceCheckWrite_otherIt(thread, location);
+       RESTORE_MODEL_FLAG(old_flag);
+}
+
+void raceCheckWriteMemop(thread_id_t thread, const void *location, size_t size)
+{
+       int old_flag = GET_MODEL_FLAG;
+       ENTER_MODEL_FLAG;
+
+       ClockVector *currClock = get_execution()->get_cv(thread);
+       if (currClock == NULL) {
+               RESTORE_MODEL_FLAG(old_flag);
+               return;
+       }
+
+       bool alreadyHasRace = false;
+       for (uint i = 0; i < size; i++) {
+               uint64_t *shadow = lookupAddressEntry(location);
+               uint64_t shadowval = *shadow;
+
+               struct DataRace * race = NULL;
+               /* Do full record */
+               if (shadowval != 0 && !ISSHORTRECORD(shadowval)) {
+                       race = fullRaceCheckWrite(thread, location, shadow, currClock);
+                       goto Exit;
+               }
+
+               {
+                       int threadid = id_to_int(thread);
+                       modelclock_t ourClock = currClock->getClock(thread);
+
+                       /* Thread ID is too large or clock is too large. */
+                       if (threadid > MAXTHREADID || ourClock > MAXWRITEVECTOR) {
+                               expandRecord(shadow);
+                               race = fullRaceCheckWrite(thread, location, shadow, currClock);
+                               goto Exit;
+                       }
+
+                       {
+                               /* Check for datarace against last read. */
+                               modelclock_t readClock = READVECTOR(shadowval);
+                               thread_id_t readThread = int_to_id(RDTHREADID(shadowval));
+
+                               if (clock_may_race(currClock, thread, readClock, readThread)) {
+                                       /* We have a datarace */
+                                       race = reportDataRace(readThread, readClock, false, get_execution()->get_parent_action(thread), true, location);
+                                       goto ShadowExit;
+                               }
+                       }
+
+                       {
+                               /* Check for datarace against last write. */
+                               modelclock_t writeClock = WRITEVECTOR(shadowval);
+                               thread_id_t writeThread = int_to_id(WRTHREADID(shadowval));
+
+                               if (clock_may_race(currClock, thread, writeClock, writeThread)) {
+                                       /* We have a datarace */
+                                       race = reportDataRace(writeThread, writeClock, true, get_execution()->get_parent_action(thread), true, location);
+                                       goto ShadowExit;
+                               }
+                       }
+
+ShadowExit:
+                       *shadow = ENCODEOP(0, 0, threadid, ourClock);
+               }
+
+Exit:
+               if (race) {
+#ifdef REPORT_DATA_RACES
+                       if (!alreadyHasRace) {
+                               alreadyHasRace = true;
+                               race->numframes=backtrace(race->backtrace, sizeof(race->backtrace)/sizeof(void*));
+                               if (raceset->add(race))
+                                       assert_race(race);
+                               else model_free(race);
+                       } else {
+                               model_free(race);
+                       }
+#else
+                       model_free(race);
+#endif
+               }
+       }
+       RESTORE_MODEL_FLAG(old_flag);
+}
+
+void raceCheckReadMemop(thread_id_t thread, const void * location, size_t size)
+{
+       int old_flag = GET_MODEL_FLAG;
+       ENTER_MODEL_FLAG;
+
+       ClockVector *currClock = get_execution()->get_cv(thread);
+       if (currClock == NULL) {
+               RESTORE_MODEL_FLAG(old_flag);
+               return;
+       }
+
+       bool alreadyHasRace = false;
+       for (uint i = 0; i < size; i++) {
+               uint64_t *shadow = lookupAddressEntry(location);
+               uint64_t shadowval = *shadow;
+               struct DataRace * race = NULL;
+
+               /* Do full record */
+               if (shadowval != 0 && !ISSHORTRECORD(shadowval)) {
+                       race = fullRaceCheckRead(thread, location, shadow, currClock);
+                       goto Exit;
+               }
+
+               {
+                       int threadid = id_to_int(thread);
+                       modelclock_t ourClock = currClock->getClock(thread);
+
+                       /* Thread ID is too large or clock is too large. */
+                       if (threadid > MAXTHREADID || ourClock > MAXWRITEVECTOR) {
+                               expandRecord(shadow);
+                               race = fullRaceCheckRead(thread, location, shadow, currClock);
+                               goto Exit;
+                       }
+
+                       /* Check for datarace against last write. */
+                       modelclock_t writeClock = WRITEVECTOR(shadowval);
+                       thread_id_t writeThread = int_to_id(WRTHREADID(shadowval));
+
+                       if (clock_may_race(currClock, thread, writeClock, writeThread)) {
+                               /* We have a datarace */
+                               race = reportDataRace(writeThread, writeClock, true, get_execution()->get_parent_action(thread), false, location);
+                       }
+
+                       modelclock_t readClock = READVECTOR(shadowval);
+                       thread_id_t readThread = int_to_id(RDTHREADID(shadowval));
+
+                       if (clock_may_race(currClock, thread, readClock, readThread)) {
+                               /* We don't subsume this read... Have to expand record. */
+                               expandRecord(shadow);
+                               struct RaceRecord *record = (struct RaceRecord *) (*shadow);
+                               record->thread[1] = thread;
+                               record->readClock[1] = ourClock;
+                               record->numReads++;
+
+                               goto Exit;
+                       }
+
+                       *shadow = ENCODEOP(threadid, ourClock, id_to_int(writeThread), writeClock) | (shadowval & ATOMICMASK);
+               }
+Exit:
+               if (race) {
+#ifdef REPORT_DATA_RACES
+                       if (!alreadyHasRace) {
+                               race->numframes=backtrace(race->backtrace, sizeof(race->backtrace)/sizeof(void*));
+                               if (raceset->add(race))
+                                       assert_race(race);
+                               else model_free(race);
+                       } else {
+                               model_free(race);
+                       }
+#else
+                       model_free(race);
+#endif
+               }
+       }
        RESTORE_MODEL_FLAG(old_flag);
 }
 
index 34bfd40..1cdf8c7 100644 (file)
@@ -43,10 +43,7 @@ struct DataRace {
 #define MASK16BIT 0xffff
 
 void initRaceDetector();
-void raceCheckWrite(thread_id_t thread, void *location);
 void atomraceCheckWrite(thread_id_t thread, void *location);
-void raceCheckRead(thread_id_t thread, const void *location);
-
 void atomraceCheckRead(thread_id_t thread, const void *location);
 void recordWrite(thread_id_t thread, void *location);
 void recordCalloc(void *location, size_t size);
@@ -65,6 +62,9 @@ void raceCheckWrite16(thread_id_t thread, const void *location);
 void raceCheckWrite32(thread_id_t thread, const void *location);
 void raceCheckWrite64(thread_id_t thread, const void *location);
 
+void raceCheckWriteMemop(thread_id_t thread, const void *location, size_t size);
+void raceCheckReadMemop(thread_id_t thread, const void *location, size_t size);
+
 #ifdef COLLECT_STAT
 void print_normal_accesses();
 #endif
index 214026d..118e75c 100644 (file)
@@ -8,92 +8,6 @@
 #include "threads-model.h"
 #include "snapshot-interface.h"
 
-void store_8(void *addr, uint8_t val)
-{
-       DEBUG("addr = %p, val = %" PRIu8 "\n", addr, val);
-       thread_id_t tid = thread_current_id();
-       raceCheckWrite(tid, addr);
-       (*(uint8_t *)addr) = val;
-}
-
-void store_16(void *addr, uint16_t val)
-{
-       DEBUG("addr = %p, val = %" PRIu16 "\n", addr, val);
-       thread_id_t tid = thread_current_id();
-       raceCheckWrite(tid, addr);
-       raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 1));
-       (*(uint16_t *)addr) = val;
-}
-
-void store_32(void *addr, uint32_t val)
-{
-       DEBUG("addr = %p, val = %" PRIu32 "\n", addr, val);
-       thread_id_t tid = thread_current_id();
-       raceCheckWrite(tid, addr);
-       raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 1));
-       raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 2));
-       raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 3));
-       (*(uint32_t *)addr) = val;
-}
-
-void store_64(void *addr, uint64_t val)
-{
-       DEBUG("addr = %p, val = %" PRIu64 "\n", addr, val);
-       thread_id_t tid = thread_current_id();
-       raceCheckWrite(tid, addr);
-       raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 1));
-       raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 2));
-       raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 3));
-       raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 4));
-       raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 5));
-       raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 6));
-       raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 7));
-       (*(uint64_t *)addr) = val;
-}
-
-uint8_t load_8(const void *addr)
-{
-       DEBUG("addr = %p\n", addr);
-       thread_id_t tid = thread_current_id();
-       raceCheckRead(tid, addr);
-       return *((uint8_t *)addr);
-}
-
-uint16_t load_16(const void *addr)
-{
-       DEBUG("addr = %p\n", addr);
-       thread_id_t tid = thread_current_id();
-       raceCheckRead(tid, addr);
-       raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 1));
-       return *((uint16_t *)addr);
-}
-
-uint32_t load_32(const void *addr)
-{
-       DEBUG("addr = %p\n", addr);
-       thread_id_t tid = thread_current_id();
-       raceCheckRead(tid, addr);
-       raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 1));
-       raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 2));
-       raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 3));
-       return *((uint32_t *)addr);
-}
-
-uint64_t load_64(const void *addr)
-{
-       DEBUG("addr = %p\n", addr);
-       thread_id_t tid = thread_current_id();
-       raceCheckRead(tid, addr);
-       raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 1));
-       raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 2));
-       raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 3));
-       raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 4));
-       raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 5));
-       raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 6));
-       raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 7));
-       return *((uint64_t *)addr);
-}
-
 /**
  * Helper functions used by CDSPass
  * The CDSPass implementation does not replace normal load/stores with cds load/stores,
@@ -138,6 +52,7 @@ void cds_store64(void *addr)
 }
 
 void cds_load8(const void *addr) {
+       DEBUG("addr = %p\n", addr);
        if (!model)
                return;
        thread_id_t tid = thread_current_id();
@@ -145,6 +60,7 @@ void cds_load8(const void *addr) {
 }
 
 void cds_load16(const void *addr) {
+       DEBUG("addr = %p\n", addr);
        if (!model)
                return;
        thread_id_t tid = thread_current_id();
@@ -152,6 +68,7 @@ void cds_load16(const void *addr) {
 }
 
 void cds_load32(const void *addr) {
+       DEBUG("addr = %p\n", addr);
        if (!model)
                return;
        thread_id_t tid = thread_current_id();
@@ -159,6 +76,7 @@ void cds_load32(const void *addr) {
 }
 
 void cds_load64(const void *addr) {
+       DEBUG("addr = %p\n", addr);
        if (!model)
                return;
        thread_id_t tid = thread_current_id();
index 20c2962..3d5168f 100644 (file)
@@ -138,199 +138,63 @@ void init_memory_ops()
 
 void * memcpy(void * dst, const void * src, size_t n) {
        if (model && !inside_model) {
-               //model_print("memcpy intercepted\n");
+               //model_print("memcpy size: %d\n", n);
                thread_id_t tid = thread_current_id();
-               if (((uintptr_t)src&7) == 0 && ((uintptr_t)dst&7) == 0 && (n&7) == 0) {
-                       for (uint i = 0; i < (n>>3); i++) {
-                               raceCheckRead64(tid, (void *)(((uint64_t *)src) + i));
-                               ((volatile uint64_t *)dst)[i] = ((uint64_t *)src)[i];
-                               raceCheckWrite64(tid, (void *)(((uint64_t *)dst) + i));
-                       }
-               } else if (((uintptr_t)src&3) == 0 && ((uintptr_t)dst&3) == 0 && (n&3) == 0) {
-                       for (uint i = 0; i < (n>>2); i++) {
-                               raceCheckRead32(tid, (void *)(((uint32_t *)src) + i));
-                               ((volatile uint32_t *)dst)[i] = ((uint32_t *)src)[i];
-                               raceCheckWrite32(tid, (void *)(((uint32_t *)dst) + i));
-                       }
-               } else if (((uintptr_t)src&1) == 0 && ((uintptr_t)dst&1) == 0 && (n&1) == 0) {
-                       for (uint i = 0; i < (n>>1); i++) {
-                               raceCheckRead16(tid, (void *)(((uint16_t *)src) + i));
-                               ((volatile uint16_t *)dst)[i] = ((uint16_t *)src)[i];
-                               raceCheckWrite16(tid, (void *)(((uint16_t *)dst) + i));
-                       }
-               } else {
-                       for(uint i=0;i<n;i++) {
-                               raceCheckRead8(tid, (void *)(((char *)src) + i));
-                               ((volatile char *)dst)[i] = ((char *)src)[i];
-                               raceCheckWrite8(tid, (void *)(((char *)dst) + i));
-                       }
+               raceCheckReadMemop(tid, (void *)src, n);
+               raceCheckWriteMemop(tid, (void *)dst, n);
+       } else if (((uintptr_t)real_memcpy) < 2) {
+               for(uint i=0;i<n;i++) {
+                       ((volatile char *)dst)[i] = ((char *)src)[i];
                }
-       } else {
-               if (((uintptr_t)real_memcpy) < 2) {
-                       for(uint i=0;i<n;i++) {
-                               ((volatile char *)dst)[i] = ((char *)src)[i];
-                       }
-                       return dst;
-               }
-
-               return real_memcpy(dst, src, n);
+               return dst;
        }
-       return dst;
+       return real_memcpy(dst, src, n);
 }
 
 void * memmove(void * dst, const void * src, size_t n) {
        if (model && !inside_model) {
                thread_id_t tid = thread_current_id();
-               if (((uintptr_t)src&7) == 0 && ((uintptr_t)dst&7) == 0 && (n&7) == 0) {
-                       if (((uintptr_t)dst) < ((uintptr_t)src))
-                               for (uint i = 0; i < (n>>3); i++) {
-                                       raceCheckRead64(tid, (void *)(((uint64_t *)src) + i));
-                                       ((volatile uint64_t *)dst)[i] = ((uint64_t *)src)[i];
-                                       raceCheckWrite64(tid, (void *)(((uint64_t *)dst) + i));
-                               }
-                       else
-                               for (uint i = (n>>3); i != 0;) {
-                                       i--;
-                                       raceCheckRead64(tid, (void *)(((uint64_t *)src) + i));
-                                       ((volatile uint64_t *)dst)[i] = ((uint64_t *)src)[i];
-                                       raceCheckWrite64(tid, (void *)(((uint64_t *)dst) + i));
-                               }
-               } else if (((uintptr_t)src&3) == 0 && ((uintptr_t)dst&3) == 0 && (n&3) == 0) {
-                       if (((uintptr_t)dst) < ((uintptr_t)src))
-                               for (uint i = 0; i < (n>>2); i++) {
-                                       raceCheckRead32(tid, (void *)(((uint32_t *)src) + i));
-                                       ((volatile uint32_t *)dst)[i] = ((uint32_t *)src)[i];
-                                       raceCheckWrite32(tid, (void *)(((uint32_t *)dst) + i));
-                               }
-                       else
-                               for (uint i = (n>>2); i != 0;) {
-                                       i--;
-                                       raceCheckRead32(tid, (void *)(((uint32_t *)src) + i));
-                                       ((volatile uint32_t *)dst)[i] = ((uint32_t *)src)[i];
-                                       raceCheckWrite32(tid, (void *)(((uint32_t *)dst) + i));
-                               }
-               } else if (((uintptr_t)src&1) == 0 && ((uintptr_t)dst&1) == 0 && (n&1) == 0) {
-                       if (((uintptr_t)dst) < ((uintptr_t)src))
-                               for (uint i = 0; i < (n>>1); i++) {
-                                       raceCheckRead16(tid, (void *)(((uint16_t *)src) + i));
-                                       ((volatile uint16_t *)dst)[i] = ((uint16_t *)src)[i];
-                                       raceCheckWrite16(tid, (void *)(((uint16_t *)dst) + i));
-                               }
-                       else
-                               for (uint i = (n>>1); i != 0;) {
-                                       i--;
-                                       raceCheckRead16(tid, (void *)(((uint16_t *)src) + i));
-                                       ((volatile uint16_t *)dst)[i] = ((uint16_t *)src)[i];
-                                       raceCheckWrite16(tid, (void *)(((uint16_t *)dst) + i));
-                               }
-               } else {
-                       if (((uintptr_t)dst) < ((uintptr_t)src))
-                               for(uint i = 0; i < n; i++) {
-                                       raceCheckRead8(tid, (void *)(((char *)src) + i));
-                                       ((volatile char *)dst)[i] = ((char *)src)[i];
-                                       raceCheckWrite8(tid, (void *)(((char *)dst) + i));
-                               }
-                       else
-                               for(uint i = n; i != 0;) {
-                                       i--;
-                                       raceCheckRead8(tid, (void *)(((char *)src) + i));
-                                       ((volatile char *)dst)[i] = ((char *)src)[i];
-                                       raceCheckWrite8(tid, (void *)(((char *)dst) + i));
-                               }
-               }
-       } else {
-               if (((uintptr_t)real_memmove) < 2) {
-                       if (((uintptr_t)dst) < ((uintptr_t)src))
-                               for(uint i=0;i<n;i++) {
-                                       ((volatile char *)dst)[i] = ((char *)src)[i];
-                               }
-                       else
-                               for(uint i=n;i!=0; ) {
-                                       i--;
-                                       ((volatile char *)dst)[i] = ((char *)src)[i];
-                               }
-                       return dst;
-               }
-        return real_memmove(dst, src, n);
+               raceCheckReadMemop(tid, (void *)src, n);
+               raceCheckWriteMemop(tid, (void *)dst, n);
+       } else if (((uintptr_t)real_memmove) < 2) {
+               if (((uintptr_t)dst) < ((uintptr_t)src))
+                       for(uint i=0;i<n;i++) {
+                               ((volatile char *)dst)[i] = ((char *)src)[i];
+                       }
+               else
+                       for(uint i=n;i!=0; ) {
+                               i--;
+                               ((volatile char *)dst)[i] = ((char *)src)[i];
+                       }
+               return dst;
        }
-       return dst;
+       return real_memmove(dst, src, n);
 }
 
 void * memset(void *dst, int c, size_t n) {
        if (model && !inside_model) {
+               //model_print("memset size: %d\n", n);
                thread_id_t tid = thread_current_id();
-               uint8_t cs = c&0xff;
-               if (((uintptr_t)dst&7) == 0 && (n&7) == 0) {
-                       for (uint i = 0; i < (n>>3); i++) {
-                   uint16_t cs2 = cs << 8 | cs;
-                   uint64_t cs3 = cs2 << 16 | cs2;
-                   uint64_t cs4 = cs3 << 32 | cs3;
-                               ((volatile uint64_t *)dst)[i] = cs4;
-                               raceCheckWrite64(tid, (void *)(((uint64_t *)dst) + i));
-                       }
-               } else if (((uintptr_t)dst&3) == 0 && (n&3) == 0) {
-                       for (uint i = 0; i < (n>>2); i++) {
-                   uint16_t cs2 = cs << 8 | cs;
-                   uint32_t cs3 = cs2 << 16 | cs2;
-                               ((volatile uint32_t *)dst)[i] = cs3;
-                               raceCheckWrite32(tid, (void *)(((uint32_t *)dst) + i));
-                       }
-               } else if (((uintptr_t)dst&1) == 0 && (n&1) == 0) {
-                       for (uint i = 0; i < (n>>1); i++) {
-                   uint16_t cs2 = cs << 8 | cs;
-                               ((volatile uint16_t *)dst)[i] = cs2;
-                               raceCheckWrite16(tid, (void *)(((uint16_t *)dst) + i));
-                       }
-               } else {
-                       for (uint i=0;i<n;i++) {
-                               ((volatile char *)dst)[i] = cs;
-                               raceCheckWrite8(tid, (void *)(((char *)dst) + i));
-                       }
+               raceCheckWriteMemop(tid, (void *)dst, n);
+       } else if (((uintptr_t)real_memset) < 2) {
+               //stuck in dynamic linker alloc cycle...
+               for(size_t s=0;s<n;s++) {
+                       ((volatile char *)dst)[s] = (char) c;
                }
-       } else {
-               if (((uintptr_t)real_memset) < 2) {
-                       //stuck in dynamic linker alloc cycle...
-                       for(size_t s=0;s<n;s++) {
-                               ((volatile char *)dst)[s] = (char) c;
-                       }
-                       return dst;
-               }
-               return real_memset(dst, c, n);
+               return dst;
        }
-       return dst;
+       return real_memset(dst, c, n);
 }
 
 void bzero(void *dst, size_t n) {
        if (model && !inside_model) {
                thread_id_t tid = thread_current_id();
-               if (((uintptr_t)dst&7) == 0 && (n&7) == 0) {
-                       for (uint i = 0; i < (n>>3); i++) {
-                               ((volatile uint64_t *)dst)[i] = 0;
-                               raceCheckWrite64(tid, (void *)(((uint64_t *)dst) + i));
-                       }
-               } else if (((uintptr_t)dst&3) == 0 && (n&3) == 0) {
-                       for (uint i = 0; i < (n>>2); i++) {
-                               ((volatile uint32_t *)dst)[i] = 0;
-                               raceCheckWrite32(tid, (void *)(((uint32_t *)dst) + i));
-                       }
-               } else if (((uintptr_t)dst&1) == 0 && (n&1) == 0) {
-                       for (uint i = 0; i < (n>>1); i++) {
-                               ((volatile uint16_t *)dst)[i] = 0;
-                               raceCheckWrite16(tid, (void *)(((uint16_t *)dst) + i));
-                       }
-               } else {
-                       for (uint i=0;i<n;i++) {
-                               ((volatile char *)dst)[i] = 0;
-                               raceCheckWrite8(tid, (void *)(((char *)dst) + i));
-                       }
-               }
-       } else {
-               if (((uintptr_t)real_bzero) < 2) {
-                       for(size_t s=0;s<n;s++) {
-                               ((volatile char *)dst)[s] = 0;
-                       }
-                       return;
+               raceCheckWriteMemop(tid, (void *)dst, n);
+       } else if (((uintptr_t)real_bzero) < 2) {
+               for(size_t s=0;s<n;s++) {
+                       ((volatile char *)dst)[s] = 0;
                }
-               real_bzero(dst, n);
+               return;
        }
+       real_bzero(dst, n);
 }