towards fixing bugs...

[IRC.git] / Robust / src / Runtime / bamboo / multicoregccompact.c
diff --git a/Robust/src/Runtime/bamboo/multicoregccompact.c b/Robust/src/Runtime/bamboo/multicoregccompact.c

index 65d04309f4981ba0f9dd72ccc800a991db09050e..ea3264cf192b7d4434cf1b0a54e5b8521dafdfcf 100644 (file)
--- a/Robust/src/Runtime/bamboo/multicoregccompact.c
+++ b/Robust/src/Runtime/bamboo/multicoregccompact.c
@@ -1,608 +1,587 @@
  #ifdef MULTICORE_GC
+#include "structdefs.h"
  #include "multicoregccompact.h"
  #include "runtime_arch.h"
  #include "multicoreruntime.h"
  #include "multicoregarbage.h"
+#include "markbit.h"
+#include "multicoremem_helper.h"
+#include "gcqueue.h"
  
-INLINE bool gc_checkCoreStatus() {
-  BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-  for(int i = 0; i < NUMCORES4GC; ++i) {
-    if(gccorestatus[i] != 0) {
-      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-      return false;
+int gc_countRunningCores() {
+  int count=0;
+  for(int i = 0; i < NUMCORES4GC; i++) {
+    if(returnedmem[i]) {
+      count++;
      }
-  }  
-  BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-  return true;
+  }
+  return count;
  }
  
-INLINE void gc_resetCoreStatus() {
-  BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-  for(int i = 0; i < NUMCORES4GC; ++i) {
-    gccorestatus[i] = 1;
-  }
-  BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+void initOrig_Dst(struct moveHelper * orig,struct moveHelper * to) {
+  // init the dst ptr
+  to->localblocknum = 0;
+  BASEPTR(to->base, BAMBOO_NUM_OF_CORE, to->localblocknum);
+  to->ptr = to->base;
+  to->bound=to->base+BLOCKSIZE(to->localblocknum);
+  
+  // init the orig ptr
+  orig->localblocknum = 0;
+  orig->ptr=orig->base = to->base;
+  orig->bound=orig->base+BLOCKSIZE(orig->localblocknum);
+#ifdef GC_CACHE_ADAPT
+  to->pagebound=to->base+BAMBOO_PAGE_SIZE;
+  orig->pagebound=orig->base+BAMBOO_PAGE_SIZE;
+#endif
+}
+
+void getSpaceLocally(struct moveHelper *to) {
+  //we have space on our core...just keep going
+  to->localblocknum++;
+  BASEPTR(to->base,BAMBOO_NUM_OF_CORE, to->localblocknum);
+  to->ptr=to->base;
+  to->bound=to->base+BLOCKSIZE(to->localblocknum);
+#ifdef GC_CACHE_ADAPT
+  to->pagebound=to->base+BAMBOO_PAGE_SIZE;
+#endif
  }
  
-// should be invoked with interrupt closed
-INLINE int assignSpareMem_I(unsigned int sourcecore,unsigned int * requiredmem,unsigned int * tomove,unsigned int * startaddr) {
-  unsigned int b = 0;
-  BLOCKINDEX(gcloads[sourcecore], b);
-  unsigned int boundptr = BOUNDPTR(b);
-  unsigned int remain = boundptr - gcloads[sourcecore];
-  unsigned int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
-  *startaddr = gcloads[sourcecore];
-  *tomove = gcfilledblocks[sourcecore] + 1;
-  if(memneed < remain) {
-    gcloads[sourcecore] += memneed;
-    return 0;
+//This function is called on the master core only...and typically by
+//the message interrupt handler
+
+void handleReturnMem_I(unsigned int cnum, void *heaptop) {
+  unsigned int blockindex;
+  BLOCKINDEX(blockindex, heaptop);
+  unsigned INTPTR localblocknum=GLOBALBLOCK2LOCAL(blockindex);
+  //this core is done as far as memory usage is concerned
+  returnedmem[cnum]=0;
+
+  struct blockrecord * blockrecord=&allocationinfo.blocktable[blockindex];
+
+  blockrecord->status=BS_FREE;
+  blockrecord->usedspace=(unsigned INTPTR)(heaptop-OFFSET2BASEVA(blockindex)-gcbaseva);
+  blockrecord->freespace=BLOCKSIZE(localblocknum)-blockrecord->usedspace;
+  /* Update the lowest free block */
+  if (blockindex < allocationinfo.lowestfreeblock) {
+    allocationinfo.lowestfreeblock=blockindex;
+  }
+
+  /* This is our own block...means we should mark other blocks above us as free*/
+  
+  if (cnum==blockrecord->corenum) {
+    unsigned INTPTR nextlocalblocknum=localblocknum+1;
+    for(;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
+      unsigned INTPTR blocknum=BLOCKINDEX2(cnum, nextlocalblocknum);
+      struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
+      nextblockrecord->status=BS_FREE;
+      nextblockrecord->usedspace=0;
+      //this is true because this cannot be the lowest block
+      nextblockrecord->freespace=BLOCKSIZE(1);
+    }
+  }
+
+  //this could be the last one....
+  int count=gc_countRunningCores();
+  if (gcmovepending==count) {
+    // All cores have stopped...hand out memory as necessary to handle all requests
+    handleMemoryRequests_I();
    } else {
-    // next available block
-    gcfilledblocks[sourcecore] += 1;
-    unsigned int newbase = 0;
-    BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
-    gcloads[sourcecore] = newbase;
-    return requiredmem-remain;
+    //see if returned memory blocks let us resolve requests
+    useReturnedMem(cnum, allocationinfo.lowestfreeblock);
    }
  }
  
-INLINE int assignSpareMem(unsigned int sourcecore,unsigned int * requiredmem,unsigned int * tomove,unsigned int * startaddr) {
-  BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-  int retval=assignSpareMem_I(sourcecore, requiredmem, tomove, startaddr);
-  BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-  return retval;
-}
+void useReturnedMem(unsigned int retcorenum, block_t localblockindex) {
+  for(int i=0;i<NUMCORES4GC;i++) {
+    unsigned INTPTR requiredmem=gcrequiredmems[i];
+    if (requiredmem) {
+      unsigned INTPTR desiredmem=maxusefulmems[i];
+      unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
+      unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
  
-INLINE void compact2Heaptophelper_I(unsigned int coren,unsigned int* p,unsigned int* numblocks,unsigned int* remain) {
-  unsigned int b;
-  unsigned int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
-  if(STARTUPCORE == coren) {
-    gctomove = true;
-    gcmovestartaddr = *p;
-    gcdstcore = gctopcore;
-    gcblock2fill = *numblocks + 1;
-  } else {
-    if(BAMBOO_CHECK_SEND_MODE()) {
-      cache_msg_4_I(coren,GCMOVESTART,gctopcore,*p,(*numblocks)+1);
-    } else {
-      send_msg_4_I(coren,GCMOVESTART,gctopcore,*p,(*numblocks)+1);
+
+      for(block_t nextlocalblocknum=localblockindex;nextlocalblocknum<numblockspercore;nextlocalblocknum++) {
+       unsigned INTPTR blocknum=BLOCKINDEX2(retcorenum, nextlocalblocknum);
+       struct blockrecord * nextblockrecord=&allocationinfo.blocktable[blocknum];
+       if (nextblockrecord->status==BS_FREE) {
+         unsigned INTPTR freespace=nextblockrecord->freespace&~BAMBOO_CACHE_LINE_MASK;
+         if (freespace>=memcheck) {
+           nextblockrecord->status=BS_USED;
+           void *blockptr=OFFSET2BASEVA(blocknum)+gcbaseva;
+           unsigned INTPTR usedspace=((nextblockrecord->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
+           //taken care of one block
+           gcmovepending--;
+           void *startaddr=blockptr+usedspace;
+           gcrequiredmems[i]=0;
+           maxusefulmems[i]=0;
+           if (i==STARTUPCORE) {
+             gctomove = true;
+             gcmovestartaddr = startaddr;
+           } else if(BAMBOO_CHECK_SEND_MODE()) {
+             cache_msg_2_I(i,GCMOVESTART,startaddr);
+           } else {
+             send_msg_2_I(i,GCMOVESTART,startaddr);
+           }
+         }
+       }
+      }
      }
    }
-  if(memneed < *remain) {
-    *p = *p + memneed;
-    gcrequiredmems[coren] = 0;
-    gcloads[gctopcore] += memneed;
-    *remain = *remain - memneed;
-  } else {
-    // next available block
-    *p = *p + *remain;
-    gcfilledblocks[gctopcore] += 1;
-    unsigned int newbase = 0;
-    BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
-    gcloads[gctopcore] = newbase;
-    gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
-    gcstopblock[gctopcore]++;
-    gctopcore = NEXTTOPCORE(gctopblock);
-    gctopblock++;
-    *numblocks = gcstopblock[gctopcore];
-    *p = gcloads[gctopcore];
-    BLOCKINDEX(*p, b);
-    *remain=GC_BLOCK_REMAIN_SIZE(b, (*p));
-  }  
-  gcmovepending--;
-} 
+}
  
-INLINE void compact2Heaptop() {
-  // no cores with spare mem and some cores are blocked with pending move
-  // find the current heap top and make them move to the heap top
-  unsigned int p;
-  unsigned int numblocks = gcfilledblocks[gctopcore];
-  p = gcloads[gctopcore];
-  unsigned int b;
-  BLOCKINDEX(p, b);
-  unsigned int remain=GC_BLOCK_REMAIN_SIZE(b, p);
-  // check if the top core finishes
+void handleReturnMem(unsigned int cnum, void *heaptop) {
    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-  if(gccorestatus[gctopcore] != 0) {
-    // let the top core finishes its own work first
-    compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain);
-    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-    return;
-  }
+  handleReturnMem_I(cnum, heaptop);
    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+}
  
-  for(int i = 0; i < NUMCORES4GC; i++) {
+void getSpaceRemotely(struct moveHelper *to, unsigned int minimumbytes) {
+  //need to get another block from elsewhere
+  //set flag to wait for memory
+
+  if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
+    gctomove=false;
      BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-    if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
-      compact2Heaptophelper_I(i, &p, &numblocks, &remain);
-      if(gccorestatus[gctopcore] != 0) {
-        BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-        // the top core is not free now
-        return;
-      }
-    }  
+    void *startaddr=handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-  } 
+
+    if (startaddr) {
+      gcmovestartaddr=startaddr;
+    } else {
+      while(!gctomove) ;
+    }
+  } else {
+    gctomove=false;
+    //send request for memory
+    send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, minimumbytes, gccurr_heaptop);
+    //wait for flag to be set that we received message
+    while(!gctomove)
+      ;
+  }
+
+  //store pointer
+  to->ptr = gcmovestartaddr;
+
+  //set localblock number to high number to indicate this block isn't local
+  to->localblocknum = MAXBLOCK;
+  unsigned int globalblocknum;
+  BLOCKINDEX(globalblocknum, to->ptr);
+  to->base = gcbaseva + OFFSET2BASEVA(globalblocknum);
+  to->bound=gcbaseva+BOUNDPTR(globalblocknum);
+#ifdef GC_CACHE_ADAPT
+  to->pagebound=(void *)((int)((int)(to->ptr)&(~(BAMBOO_PAGE_SIZE-1)))+BAMBOO_PAGE_SIZE);
+#endif
  }
  
-INLINE void resolvePendingMoveRequest() {
-  int i;
-  int j;
-  bool nosparemem = true;
-  bool haspending = false;
-  bool hasrunning = false;
-  bool noblock = false;
-  unsigned int dstcore = 0;       // the core who need spare mem
-  unsigned int sourcecore = 0;       // the core who has spare mem
-  for(i = j = 0; (i < NUMCORES4GC) && (j < NUMCORES4GC); ) {
-    if(nosparemem) {
-      // check if there are cores with spare mem
-      if(gccorestatus[i] == 0) {
-        // finished working, check if it still have spare mem
-        if(gcfilledblocks[i] < gcstopblock[i]) {
-          // still have spare mem
-          nosparemem = false;
-          sourcecore = i;
-        }  
-      }
-      i++;
-    }  
-    if(!haspending) {
-      if(gccorestatus[j] != 0) {
-        // not finished, check if it has pending move requests
-        if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
-          dstcore = j;
-          haspending = true;
-        } else {
-          hasrunning = true;
-        } 
-      } 
-      j++;
-    }  
-    if(!nosparemem && haspending) {
-      // find match
-      unsigned int tomove = 0;
-      unsigned int startaddr = 0;
-      gcrequiredmems[dstcore] = assignSpareMem(sourcecore,gcrequiredmems[dstcore],&tomove,&startaddr);
-      if(STARTUPCORE == dstcore) {
-        gcdstcore = sourcecore;
-        gctomove = true;
-        gcmovestartaddr = startaddr;
-        gcblock2fill = tomove;
+void getSpace(struct moveHelper *to, unsigned int minimumbytes) {
+  //need more space to compact into
+  if ((to->localblocknum+1) < gcblock2fill) {
+    getSpaceLocally(to);
+  } else {
+    getSpaceRemotely(to, minimumbytes);
+  }
+}
+
+void compacthelper(struct moveHelper * orig,struct moveHelper * to) {
+  bool senttopmessage=false;
+  while(true) {
+    if ((gccurr_heaptop <= ((unsigned INTPTR)(to->bound-to->ptr)))&&!senttopmessage) {
+      //This block is the last for this core...let the startup know
+      if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
+       handleReturnMem(BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
        } else {
-        send_msg_4(dstcore,GCMOVESTART,sourcecore,startaddr,tomove);
+       send_msg_3(STARTUPCORE, GCRETURNMEM, BAMBOO_NUM_OF_CORE, to->ptr+gccurr_heaptop);
        }
-      gcmovepending--;
-      nosparemem = true;
-      haspending = false;
-      noblock = true;
+      //Only send the message once
+      senttopmessage=true;
      }
-  }  
-  
-  if(!hasrunning && !noblock) {
-    gc_status_info.gcphase = SUBTLECOMPACTPHASE;
-    compact2Heaptop();
-  }
-} 
-
-// If out of boundary of valid shared memory, return false, else return true
-INLINE bool nextSBlock(struct moveHelper * orig) {
-  orig->blockbase = orig->blockbound;
-  
-  bool sbchanged = false;
-  unsigned int origptr = orig->ptr;
-  unsigned int blockbase = orig->blockbase;
-  unsigned int blockbound = orig->blockbound;
-  unsigned int bound = orig->bound;
-outernextSBlock:
-  // check if across a big block
-  // TODO now do not zero out the whole memory, maybe the last two conditions
-  // are useless now
-  if((blockbase>=bound)||(origptr>=bound)||((origptr!=NULL)&&(*((int*)origptr))==0)||((*((int*)blockbase))==0)) {
-  innernextSBlock:
-    // end of current heap block, jump to next one
-    orig->numblocks++;
-    BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
-    if(orig->base >= gcbaseva + BAMBOO_SHARED_MEM_SIZE) {
-      // out of boundary
-      orig->ptr = orig->base; // set current ptr to out of boundary too
-      return false;
+    unsigned int minimumbytes=COMPACTUNITS(orig, to);
+    if (orig->ptr==orig->bound) {
+      //need more data to compact
+      //increment the core
+      orig->localblocknum++;
+      BASEPTR(orig->base,BAMBOO_NUM_OF_CORE, orig->localblocknum);
+      orig->ptr=orig->base;
+      orig->bound=orig->base+BLOCKSIZE(orig->localblocknum);
+#ifdef GC_CACHE_ADAPT
+      orig->pagebound=orig->base+BAMBOO_PAGE_SIZE;
+#endif
+      if (orig->base >= gcbaseva+BAMBOO_SHARED_MEM_SIZE)
+       break;
      }
-    orig->blockbase = orig->base;
-    orig->sblockindex=(unsigned int)(orig->blockbase-gcbaseva)/BAMBOO_SMEM_SIZE;
-    sbchanged = true;
-    unsigned int blocknum = 0;
-    BLOCKINDEX(orig->base, blocknum);
-    if(bamboo_smemtbl[blocknum] == 0) {
-      // goto next block
-      goto innernextSBlock;
+    if (minimumbytes!=0) {
+      getSpace(to, minimumbytes);
      }
-    // check the bamboo_smemtbl to decide the real bound
-    orig->bound = orig->base + bamboo_smemtbl[blocknum];
-  } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) {
-    orig->sblockindex += 1;
-    sbchanged = true;
-  }  
-
-  // check if this sblock should be skipped or have special start point
-  int sbstart = gcsbstarttbl[orig->sblockindex];
-  if(sbstart == -1) {
-    // goto next sblock
-    orig->sblockindex += 1;
-    orig->blockbase += BAMBOO_SMEM_SIZE;
-    goto outernextSBlock;
-  } else if((sbstart != 0) && (sbchanged)) {
-    // the first time to access this SBlock
-    // not start from the very beginning
-    orig->blockbase = sbstart;
-  } 
-
-  // setup information for this sblock
-  orig->blockbound = orig->blockbase+(unsigned int)*((int*)(orig->blockbase));
-  orig->offset = BAMBOO_CACHE_LINE_SIZE;
-  orig->ptr = orig->blockbase + orig->offset;
-  if(orig->ptr >= orig->bound) {
-    // met a lobj, move to next block
-    goto innernextSBlock;
    }
-
-  return true;
-} 
-
-// return false if there are no available data to compact
-INLINE bool initOrig_Dst(struct moveHelper * orig,struct moveHelper * to) {
-  // init the dst ptr
-  to->numblocks = 0;
-  to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
-  to->bound = BAMBOO_SMEM_SIZE_L;
-  BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
-
-  unsigned int tobase = to->base;
-  to->ptr = tobase + to->offset;
-
-  // init the orig ptr
-  orig->numblocks = 0;
-  orig->base = tobase;
-  unsigned int blocknum = 0;
-  BLOCKINDEX(orig->base, blocknum);
-  unsigned int origbase = orig->base;
-  // check the bamboo_smemtbl to decide the real bound
-  orig->bound = origbase + (unsigned int)bamboo_smemtbl[blocknum];
-  orig->blockbase = origbase;
-  orig->sblockindex = (unsigned int)(origbase - gcbaseva) / BAMBOO_SMEM_SIZE;
-
-  int sbstart = gcsbstarttbl[orig->sblockindex];
-  if(sbstart == -1) {
-    // goto next sblock
-    orig->blockbound=gcbaseva+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
-    return nextSBlock(orig);
-  } else if(sbstart != 0) {
-    orig->blockbase = sbstart;
+  if (BAMBOO_NUM_OF_CORE==STARTUPCORE) {
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    handlegcfinishcompact_I(BAMBOO_NUM_OF_CORE, 0, 0);
+    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+  } else {
+    send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, 0, 0);
    }
-  orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
-  orig->offset = BAMBOO_CACHE_LINE_SIZE;
-  orig->ptr = orig->blockbase + orig->offset;
-
-  return true;
  }
  
-INLINE void nextBlock(struct moveHelper * to) {
-  to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
-  to->bound += BAMBOO_SMEM_SIZE;
-  to->numblocks++;
-  BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
-  to->offset = BAMBOO_CACHE_LINE_SIZE;
-  to->ptr = to->base + to->offset;
-}
+void * checkNeighbors_I(int ncorenum, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
+  int minblockindex=allocationinfo.lowestfreeblock/NUMCORES4GC;
+  unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
+  unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
  
-INLINE unsigned int findValidObj(struct moveHelper * orig,struct moveHelper * to,int * type) {
-  unsigned int size = 0;
-  while(true) {
-    CACHEADAPT_COMPLETE_PAGE_CONVERT(orig, to, to->ptr, false);
-    unsigned int origptr = (unsigned int)(orig->ptr);
-    unsigned int origbound = (unsigned int)orig->bound;
-    unsigned int origblockbound = (unsigned int)orig->blockbound;
-    if((origptr >= origbound) || (origptr == origblockbound)) {
-      if(!nextSBlock(orig)) {
-        // finished, no more data
-        return -1;
+  for(block_t lblock=minblockindex;lblock<numblockspercore;lblock++) {
+    for(int i=0;i<NUM_CORES2TEST;i++) {
+      int neighborcore=core2test[ncorenum][i];
+      if (neighborcore!=-1) {
+       block_t globalblockindex=BLOCKINDEX2(neighborcore, lblock);
+       struct blockrecord * block=&allocationinfo.blocktable[globalblockindex];
+       if (block->status==BS_FREE) {
+         unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
+         if (memcheck<=freespace) {
+           //we have a block
+           //mark block as used
+           block->status=BS_USED;
+           void *blockptr=OFFSET2BASEVA(globalblockindex)+gcbaseva;
+           unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
+           return blockptr+usedspace;
+         }
+       }
        }
-      continue;
      }
-    // check the obj's type, size and mark flag
-    *type = ((int *)(origptr))[0];
-    size = 0;
-    if(*type == 0) {
-      // end of this block, go to next one
-      if(!nextSBlock(orig)) {
-        // finished, no more data
-        return -1;
+  }
+  return NULL;
+}
+
+void * globalSearch_I(unsigned int topblock, unsigned INTPTR requiredmem, unsigned INTPTR desiredmem) {
+  unsigned int firstfree=NOFREEBLOCK;
+  unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
+  unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
+
+  for(block_t i=allocationinfo.lowestfreeblock;i<topblock;i++) {
+    struct blockrecord * block=&allocationinfo.blocktable[i];
+    if (block->status==BS_FREE) {
+      if(firstfree==NOFREEBLOCK)
+       firstfree=i;
+      unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
+      if (memcheck<=freespace) {
+       //we have a block
+       //mark block as used
+       block->status=BS_USED;
+       void *blockptr=OFFSET2BASEVA(i)+gcbaseva;
+       unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
+       allocationinfo.lowestfreeblock=firstfree;
+       return blockptr+usedspace;
        }
-      continue;
-    } else if(*type < NUMCLASSES) {
-      // a normal object
-      size = classsize[*type];
-    } else {
-      // an array
-      struct ArrayObject *ao=(struct ArrayObject *)(origptr);
-      unsigned int elementsize=classsize[*type];
-      unsigned int length=ao->___length___;
-      size=(unsigned int)sizeof(struct ArrayObject)+(unsigned int)(length*elementsize);
      }
-    return size;
    }
+  allocationinfo.lowestfreeblock=firstfree;
+  return NULL;
  }
  
-// endaddr does not contain spaces for headers
-INLINE bool moveobj(struct moveHelper * orig, struct moveHelper * to, unsigned int stopblock) {
-  if(stopblock == 0) {
-    return true;
-  }
+void handleOneMemoryRequest(int core, unsigned int lowestblock) {
+  unsigned INTPTR requiredmem=gcrequiredmems[core];
+  unsigned INTPTR desiredmem=maxusefulmems[core];
+  block_t firstfree=NOFREEBLOCK;
+  unsigned INTPTR threshold=(desiredmem<MINMEMORYCHUNKSIZE)? desiredmem: MINMEMORYCHUNKSIZE;
+  unsigned INTPTR memcheck=requiredmem>threshold?requiredmem:threshold;
  
-  int type = 0;
-  unsigned int size = findValidObj(orig, to, &type);
-  unsigned int isize = 0;
+  for(block_t searchblock=lowestblock;searchblock<GCNUMBLOCK;searchblock++) {
+    struct blockrecord * block=&allocationinfo.blocktable[searchblock];
+    if (block->status==BS_FREE) {
+      if(firstfree==NOFREEBLOCK)
+       firstfree=searchblock;
+      //don't take a block from another core that hasn't returned its memory yet
+      if (block->corenum!=core&&returnedmem[block->corenum])
+       continue;
+      
+      unsigned INTPTR freespace=block->freespace&~BAMBOO_CACHE_LINE_MASK;
+      if (freespace>=memcheck) {
+       //TODO: should check memory block at same level on our own core...if that works, use it to preserve locality
  
-  if(size == -1) {
-    // finished, no more data
-    return true;
-  }
-  ALIGNSIZE(size, &isize);       // no matter is the obj marked or not
-                                 // should be able to across
-  void * origptr = orig->ptr;
-  int markedstatus;
-  GETMARKED(markedstatus, origptr);
-  
-  if(markedstatus==MARKEDFIRST) {
-    unsigned int totop = (unsigned int)to->top;
-    unsigned int tobound = (unsigned int)to->bound;
-    BAMBOO_ASSERT(totop<=tobound);
-    GCPROFILE_RECORD_LIVE_OBJ();
-    // marked obj, copy it to current heap top
-    // check to see if remaining space is enough
-    if((unsigned int)(totop + isize) > tobound) {
-      // fill 0 indicating the end of this block
-      BAMBOO_MEMSET_WH(to->ptr,  '\0', tobound - totop);
-      // fill the header of this block and then go to next block
-      to->offset += tobound - totop;
-      CLOSEBLOCK(to->base, to->offset);
-#ifdef GC_CACHE_ADAPT
-      unsigned int tmp_ptr = to->ptr;
-#endif 
-      nextBlock(to);
-      if((to->top+isize)>(to->bound)) tprintf("%x, %x, %d, %d, %d, %d \n", to->ptr, orig->ptr, to->top, to->bound, isize, size);
-      BAMBOO_ASSERT((to->top+isize)<=(to->bound));
-#ifdef GC_CACHE_ADAPT
-      CACHEADAPT_COMPLETE_PAGE_CONVERT(orig, to, tmp_ptr, true);
-#endif 
-      if(stopblock == to->numblocks) {
-        // already fulfilled the block
-        return true;
-      }  
-    }
-    BAMBOO_ASSERT((to->top+isize)<=(to->bound));
-    // set the mark field to 2, indicating that this obj has been moved
-    // and need to be flushed
-    unsigned int toptr = (unsigned int)to->ptr;
-    if(toptr != origptr) {
-      if((unsigned int)(origptr) < (unsigned int)(toptr+size)) {
-        memmove(toptr, origptr, size);
-      } else {
-        memcpy(toptr, origptr, size);
+       //we have a block
+       //mark block as used
+       block->status=BS_USED;
+       void *blockptr=OFFSET2BASEVA(searchblock)+gcbaseva;
+       unsigned INTPTR usedspace=((block->usedspace-1)&~BAMBOO_CACHE_LINE_MASK)+BAMBOO_CACHE_LINE_SIZE;
+       allocationinfo.lowestfreeblock=firstfree;
+       //taken care of one block
+       gcmovepending--;
+       void *startaddr=blockptr+usedspace;
+       if(BAMBOO_CHECK_SEND_MODE()) {
+         cache_msg_2_I(core,GCMOVESTART,startaddr);
+       } else {
+         send_msg_2_I(core,GCMOVESTART,startaddr);
+       }
+       return;
        }
-      // fill the remaining space with -2
-      BAMBOO_MEMSET_WH((unsigned int)(toptr+size), -2, isize-size);
-    }
-    // store mapping info
-    gcmappingtbl[OBJMAPPINGINDEX((unsigned int)origptr)]=(unsigned int)toptr;
-    gccurr_heaptop -= isize;
-    to->ptr += isize;
-    to->offset += isize;
-    to->top += isize;
-    BAMBOO_ASSERT((to->top)<=(to->bound));
-#ifdef GC_CACHE_ADAPT
-    unsigned int tmp_ptr = to->ptr;
-#endif // GC_CACHE_ADAPT
-    if(to->top == to->bound) {
-      CLOSEBLOCK(to->base, to->offset);
-      nextBlock(to);
      }
-#ifdef GC_CACHE_ADAPT
-    CACHEADAPT_COMPLETE_PAGE_CONVERT(orig, to, tmp_ptr, true);
-#endif
-  } 
-  
-  // move to next obj
-  orig->ptr += isize; 
+  }
+  //this is bad...ran out of memory
+  printf("Out of memory.  Was trying for %u bytes\n", threshold);
+  BAMBOO_EXIT();
+}
+
+void handleMemoryRequests_I() {
+  unsigned int lowestblock=allocationinfo.lowestfreeblock;
+  if (lowestblock==NOFREEBLOCK) {
+    lowestblock=numblockspercore*NUMCORES4GC;
+  }
    
-  return ((((unsigned int)(orig->ptr) > (unsigned int)(orig->bound))||((unsigned int)(orig->ptr) == (unsigned int)(orig->blockbound)))&&!nextSBlock(orig));
-} 
+  for(int i=0;i < NUMCORES4GC; i++) {
+    if (gcrequiredmems[i]) {
+      handleOneMemoryRequest(i, lowestblock);
+      lowestblock=allocationinfo.lowestfreeblock;
+    }
+  }
+}
  
-// should be invoked with interrupt closed
-bool gcfindSpareMem_I(unsigned int * startaddr,unsigned int * tomove,unsigned int * dstcore,unsigned int requiredmem,unsigned int requiredcore) {
-  for(int k = 0; k < NUMCORES4GC; k++) {
-    if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
-      // check if this stopped core has enough mem
-      assignSpareMem_I(k, requiredmem, tomove, startaddr);
-      *dstcore = k;
-      return true;
+/* should be invoked with interrupt turned off */
+
+void * gcfindSpareMem_I(unsigned INTPTR requiredmem, unsigned INTPTR desiredmem,unsigned int requiredcore) {
+  if (allocationinfo.lowestfreeblock!=NOFREEBLOCK) {
+    //There are spare blocks
+    unsigned int topblock=numblockspercore*NUMCORES4GC;
+    void *memblock;
+    
+    if (memblock=checkNeighbors_I(requiredcore, requiredmem, desiredmem)) {
+      return memblock;
+    } else if (memblock=globalSearch_I(topblock, requiredmem, desiredmem)) {
+      return memblock;
      }
    }
-  // if can not find spare mem right now, hold the request
+  
+  // If we cannot find spare mem right now, hold the request
    gcrequiredmems[requiredcore] = requiredmem;
+  maxusefulmems[requiredcore]=desiredmem;
    gcmovepending++;
-  return false;
+
+  int count=gc_countRunningCores();
+  if (gcmovepending==count) {
+    // All cores have stopped...hand out memory as necessary to handle all requests
+    handleMemoryRequests_I();
+  }
+
+  return NULL;
  } 
  
-bool gcfindSpareMem(unsigned int * startaddr,unsigned int * tomove,unsigned int * dstcore,unsigned int requiredmem,unsigned int requiredcore) {
-  BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-  bool retval=gcfindSpareMem_I(startaddr, tomove, dstcore, requiredmem, requiredcore);
-  BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-  return retval;
-}
+#ifdef GC_CACHE_ADAPT
+unsigned int compactblockshelper(struct moveHelper * orig, struct moveHelper * to) {
+  unsigned int minimumbytes=0;
+  void *origptr=orig->ptr;
+  void *origbound=orig->bound;
+  void * tmporig=orig->ptr;
+  void * tmpto=to->ptr;
  
-bool compacthelper(struct moveHelper * orig,struct moveHelper * to,int * filledblocks,unsigned int * heaptopptr,bool * localcompact, bool lbmove) {
-  bool loadbalancemove = lbmove;
-  // scan over all objs in this block, compact the marked objs
-  // loop stop when finishing either scanning all active objs or
-  // fulfilled the gcstopblock
    while(true) {
-    while((unsigned int)(orig->ptr) < (unsigned int)gcmarkedptrbound) {
-      if(moveobj(orig, to, gcblock2fill)) {
-       break;
-      }
-    }
-    CACHEADAPT_SAMPLING_DATA_CONVERT(to->ptr);
-    // if no objs have been compact, do nothing,
-    // otherwise, fill the header of this block
-    if(to->offset > (unsigned int)BAMBOO_CACHE_LINE_SIZE) {
-      CLOSEBLOCK(to->base, to->offset);
-    } else {
-      to->offset = 0;
-      to->ptr = to->base;
-      to->top -= BAMBOO_CACHE_LINE_SIZE;
-    }  
-    if(*localcompact) {
-      *heaptopptr = to->ptr;
-      *filledblocks = to->numblocks;
-    }
-    
-    // send msgs to core coordinator indicating that the compact is finishing
-    // send compact finish message to core coordinator
-    if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-      gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
-      gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
-      //tprintf("--finish compact: %d, %d, %d, %x, %x \n", BAMBOO_NUM_OF_CORE, loadbalancemove, *filledblocks, *heaptopptr, gccurr_heaptop);
-      if((unsigned int)(orig->ptr) < (unsigned int)gcmarkedptrbound) {
-       // ask for more mem
-       gctomove = false;
-       if(gcfindSpareMem(&gcmovestartaddr,&gcblock2fill,&gcdstcore,gccurr_heaptop,BAMBOO_NUM_OF_CORE)) {
-         gctomove = true;
-       } else {
-         return false;
-       }
+    //call compactblocks using the page boundaries at the current bounds
+    minimumbytes=compactblocks(orig, to);
+    if(minimumbytes == 0) {
+      //bump the orig page bound...
+      //use old orig pointer to make sure we get correct block
+      CACHEADAPT_FINISH_SRC_PAGE(tmporig, tmpto, to->ptr);
+      if (orig->ptr<origbound) {
+       tmporig=orig->ptr;
+       tmpto=to->ptr;
+       orig->pagebound=orig->pagebound+BAMBOO_PAGE_SIZE;
        } else {
-       gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
-       gctomove = false;
-       // write back to the Main Memory and release any DTLB entry for the 
-       // last block as someone else might later write into it
-       // flush the shared heap
-       //BAMBOO_CACHE_FLUSH_L2();
-       return true;
+       return 0;
        }
      } else {
-      if((unsigned int)(orig->ptr) < (unsigned int)gcmarkedptrbound) {
-       // ask for more mem
-       gctomove = false;
-       send_msg_6(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE,loadbalancemove,*filledblocks,*heaptopptr,gccurr_heaptop);
+      // require more memory
+      void *endtoptr=to->ptr+minimumbytes;
+      if (endtoptr>to->bound) {
+       CACHEADAPT_FINISH_DST_PAGE(orig->ptr, tmpto, to->ptr, 0);
+       return minimumbytes;
        } else {
-       // finish compacting
-       send_msg_6(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE,loadbalancemove,*filledblocks,*heaptopptr, 0);
-       // write back to the Main Memory and release any DTLB entry for the 
-       // last block as someone else might later write into it.
-       // flush the shared heap
+       CACHEADAPT_FINISH_DST_PAGE(orig->ptr, tmpto, to->ptr, minimumbytes);
+       to->pagebound=((endtoptr-1)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE;
+       //update pointers to avoid double counting the stuff we already added in
+       tmporig=orig->ptr+minimumbytes;
+       tmpto=to->ptr+minimumbytes;
        }
      }
+  }
+}
+#endif
+
+/* This function is performance critical...  spend more time optimizing it */
+
+unsigned int compactblocks(struct moveHelper * orig, struct moveHelper * to) {
+  void *toptrinit=to->ptr;
+  void *toptr=toptrinit;
+  void *origptr=orig->ptr;
+#ifdef GC_CACHE_ADAPT
+  void *origbound=orig->pagebound;
+  void *tobound=to->pagebound;
+#else
+  void *origbound=orig->bound;
+  void *tobound=to->bound;
+#endif
+  unsigned INTPTR origendoffset=ALIGNTOTABLEINDEX((unsigned INTPTR)(origbound-gcbaseva));
+  unsigned int objlength;
+
+  while(origptr<origbound) {
+    //Try to skip over stuff fast first
+    unsigned INTPTR offset=(unsigned INTPTR) (origptr-gcbaseva);
+    unsigned INTPTR arrayoffset=ALIGNTOTABLEINDEX(offset);
+    if (!gcmarktbl[arrayoffset]) {
+      do {
+       arrayoffset++;
+       if (arrayoffset>=origendoffset) {
+         //finished with block(a page in CACHE_ADAPT version)...
+         to->ptr=toptr;
+         orig->ptr=origbound;
+         gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
+         return 0;
+       }
+      } while(!gcmarktbl[arrayoffset]);
+      origptr=CONVERTTABLEINDEXTOPTR(arrayoffset);
+    }
      
-    if(orig->ptr < gcmarkedptrbound) {
-      // still have unpacked obj
-      while(!gctomove) ;
-      BAMBOO_CACHE_MF();
-      loadbalancemove = true;
+    //Scan more carefully next
+    objlength=getMarkedLength(origptr);
+
+    if (objlength!=NOTMARKED) {
+      unsigned int length=ALIGNSIZETOBYTES(objlength);
+
+      //code between this and next comment should be removed
+#ifdef GC_DEBUG
+      unsigned int size;
+      unsigned int type;
+      gettype_size(origptr, &type, &size);
+      size=((size-1)&(~(ALIGNMENTSIZE-1)))+ALIGNMENTSIZE;
+      
+      if (size!=length) {
+       tprintf("BAD SIZE IN BITMAP: type=%u object=%x size=%u length=%u\n", type, origptr, size, length);
+       unsigned INTPTR alignsize=ALIGNOBJSIZE((unsigned INTPTR)(origptr-gcbaseva));
+       unsigned INTPTR hibits=alignsize>>4;
+       unsigned INTPTR lobits=(alignsize&15)<<1;
+       tprintf("hibits=%x lobits=%x\n", hibits, lobits);
+       tprintf("hi=%x lo=%x\n", gcmarktbl[hibits], gcmarktbl[hibits+1]);
+      }
+#endif
+      //end of code to remove
+
+      void *endtoptr=toptr+length;
+      if (endtoptr>tobound) {
+       gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
+       to->ptr=toptr;
+       orig->ptr=origptr;
+       return length;
+      }
+      //good to move objects and update pointers
+      
+      gcmappingtbl[OBJMAPPINGINDEX(origptr)]=toptr;
        
-      gctomove = false;
-      to->ptr = gcmovestartaddr;
-      to->numblocks = gcblock2fill - 1;
-      to->bound = BLOCKBOUND(to->numblocks);
-      BASEPTR(gcdstcore, to->numblocks, &(to->base));
-      to->offset = to->ptr - to->base;
-      to->top=(to->numblocks==0)?(to->offset):(to->bound-BAMBOO_SMEM_SIZE+to->offset);
-      to->base = to->ptr;
-      to->offset = BAMBOO_CACHE_LINE_SIZE;
-      to->ptr += to->offset;   // for header
-      to->top += to->offset;
-      *localcompact = (gcdstcore == BAMBOO_NUM_OF_CORE);
-      CACHEADAPT_SAMPLING_DATA_REVISE_INIT(orig, to);
+      origptr+=length;
+      toptr=endtoptr;
      } else
-      return true;
-}
+      origptr+=ALIGNMENTSIZE;
+  }
+  to->ptr=toptr;
+  orig->ptr=origptr;
+  gccurr_heaptop-=(unsigned INTPTR)(toptr-toptrinit);
+  return 0;
  }
  
  void compact() {
    BAMBOO_ASSERT(COMPACTPHASE == gc_status_info.gcphase);
-  BAMBOO_CACHE_MF();
    
-  // initialize pointers for comapcting
-  struct moveHelper * orig = (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
-  struct moveHelper * to = (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
-  if(!initOrig_Dst(orig, to)) {
-    // no available data to compact
-    // send compact finish msg to STARTUP core
-    send_msg_6(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE,false,0,to->base,0);
-    RUNFREE(orig);
-    RUNFREE(to);
-  } else {
-    CACHEADAPT_SAMPLING_DATA_REVISE_INIT(orig, to);
-
-    unsigned int filledblocks = 0;
-    unsigned int heaptopptr = 0;
-    bool localcompact = true;
-    compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact, false);
-    RUNFREE(orig);
-    RUNFREE(to);
-  }
+  // initialize structs for compacting
+  struct moveHelper orig;
+  struct moveHelper to;
+  initOrig_Dst(&orig, &to);
+
+  CACHEADAPT_SAMPLING_DATA_REVISE_INIT(&orig, &to);
+  compacthelper(&orig, &to);
  } 
  
-void compact_master(struct moveHelper * orig, struct moveHelper * to) {
-  // initialize pointers for comapcting
-  initOrig_Dst(orig, to);
-  CACHEADAPT_SAMPLING_DATA_REVISE_INIT(orig, to);
-  int filledblocks = 0;
-  unsigned int heaptopptr = 0;
-  bool finishcompact = false;
-  bool iscontinue = true;
-  bool localcompact = true;
-  bool lbmove = false;
-  while((COMPACTPHASE == gc_status_info.gcphase) || (SUBTLECOMPACTPHASE == gc_status_info.gcphase)) {
-    if((!finishcompact) && iscontinue) {
-      finishcompact = compacthelper(orig,to,&filledblocks,&heaptopptr,&localcompact, lbmove);
-    }
-    
-    if(gc_checkCoreStatus()) {
-      // all cores have finished compacting restore the gcstatus of all cores
-      gc_resetCoreStatus();
-      break;
+void master_compact() {
+  // predict number of blocks to fill for each core
+  numblockspercore = loadbalance()+1;
+  
+  GC_PRINTF("mark phase finished \n");
+  
+  gc_resetCoreStatus();
+  //initialize local data structures first....we don't want remote requests messing data up
+  unsigned int initblocks=numblockspercore*NUMCORES4GC;
+  allocationinfo.lowestfreeblock=NOFREEBLOCK;
+
+  //assigned blocks
+  for(int i=0;i<initblocks;i++) {
+    allocationinfo.blocktable[i].status=BS_USED;
+  }
+
+  //free blocks
+  for(int i=initblocks;i<GCNUMBLOCK;i++) {
+    allocationinfo.blocktable[i].status=BS_FREE;
+    allocationinfo.blocktable[i].usedspace=0;
+    //this is true because all cores have at least one block already...
+    allocationinfo.blocktable[i].freespace=BLOCKSIZE(1);
+  }
+
+  //start all of the cores
+  for(int i = 0; i < NUMCORES4GC; i++) {
+    // init some data strutures for compact phase
+    gcrequiredmems[i] = 0;
+    gccorestatus[i] = 1;
+    returnedmem[i] = 1;
+    //send start compact messages to all cores
+    if(i != STARTUPCORE) {
+      send_msg_2(i, GCSTARTCOMPACT, numblockspercore);
      } else {
-      // check if there are spare mem for pending move requires
-      if(COMPACTPHASE == gc_status_info.gcphase) {
-        resolvePendingMoveRequest();
-      } else {
-        compact2Heaptop();
+      gcblock2fill = numblockspercore;
+    }
+  }
+  GCPROFILE_ITEM();
+  // compact phase
+  compact();
+  /* wait for all cores to finish compacting */
+  
+
+  while(!gc_checkCoreStatus())
+    ;
+
+#ifdef GC_DEBUG
+  void *nextvalid=gcbaseva;
+  for(void *tmp=gcbaseva; tmp<gcbaseva+BAMBOO_SHARED_MEM_SIZE;tmp+=ALIGNMENTSIZE) {
+    unsigned int objlength=getMarkedLength(tmp);
+    void *forwarding=gcmappingtbl[OBJMAPPINGINDEX(tmp)];
+    if (tmp>=nextvalid&&((objlength!=0)!=(forwarding!=NULL))) {
+      tprintf("Maps disagree tmp=%x olength=%u forwarding=%x\n",tmp, objlength, forwarding);
+    }
+    if (tmp<nextvalid&&forwarding!=NULL) {
+      tprintf("Weird forwarding pointer\n");
+    }
+    if (tmp>=nextvalid&&(objlength!=0||forwarding!=NULL)) {
+      unsigned int length=ALIGNSIZETOBYTES(objlength);
+      unsigned int size;
+      unsigned int type;
+      nextvalid=tmp+length;
+      gettype_size(tmp, &type, &size);
+      size=((size-1)&(~(ALIGNMENTSIZE-1)))+ALIGNMENTSIZE;
+      if (size!=length) {
+       tprintf("Bad size in bitmap: tmp=%x length=%u size=%u type=%u\n", tmp, length, size, type);
+      }
+      block_t blockindex;
+      BLOCKINDEX(blockindex, forwarding);
+      struct blockrecord * block=&allocationinfo.blocktable[blockindex];
+      void *blockptr=OFFSET2BASEVA(blockindex)+gcbaseva;
+
+      if (block->status==BS_FREE) {
+       if (forwarding>(blockptr+block->usedspace)) {
+         tprintf("Pointer references free space forwarding=%x tmp=%x length=%u type=%u blockindex=%u, baseptr=%x, usedspace=%u, status=%u\n", forwarding, tmp, length, type,blockindex, blockptr, block->usedspace, block->status);
+       }
        }
-    } 
-
-    if(gctomove) {
-      BAMBOO_CACHE_MF();
-      to->ptr = gcmovestartaddr;
-      to->numblocks = gcblock2fill - 1;
-      to->bound = BLOCKBOUND(to->numblocks);
-      BASEPTR(gcdstcore, to->numblocks, &(to->base));
-      to->offset = to->ptr - to->base;
-      to->top = (to->numblocks==0)?(to->offset):(to->bound-BAMBOO_SMEM_SIZE+to->offset);
-      to->base = to->ptr;
-      to->offset = BAMBOO_CACHE_LINE_SIZE;
-      to->ptr += to->offset;  // for header
-      to->top += to->offset;
-      localcompact = (gcdstcore == BAMBOO_NUM_OF_CORE);
-      gctomove = false;
-      iscontinue = true;
-      lbmove = true;
-    } else if(!finishcompact) {
-      // still pending
-      iscontinue = false;
-      lbmove = false;
      }
    }
+#endif
+
+  GCPROFILE_ITEM();
+
+  //just in case we didn't get blocks back...
+  if (allocationinfo.lowestfreeblock==NOFREEBLOCK)
+    allocationinfo.lowestfreeblock=numblockspercore*NUMCORES4GC;
+
+  GC_PRINTF("compact phase finished \n");
  }
  
  #endif // MULTICORE_GC