fix distributed KMeans bugs and add javasingle version
[IRC.git] / Robust / src / Benchmarks / Prefetch / ManualPrefetch / KMeans / KMeans.java
index 038748185c04dda8275c86efb464903bcdb02fe3..40bce0057564d94a7844dae36407d994b9e3219d 100644 (file)
@@ -414,9 +414,7 @@ public class KMeans extends Thread {
           km.isBinaryFile = new Integer(args[i++]).intValue();
         }
       } else if(arg.equals("-z")) {
-        if(i < args.length) {
-
-        }
+        km.use_zscore_transform=0;
       } else if(arg.equals("-nthreads")) {
         if(i < args.length) {
           km.nthreads = new Integer(args[i++]).intValue();
@@ -457,40 +455,61 @@ public class KMeans extends Thread {
     int n;
     byte oldbytes[]=null;
 
-
     atomic {
+      j = -1;
       while ((n = inputFile.read(b)) != 0) {
-        j = -1;
         int x=0;
 
         if (oldbytes!=null) {
           //find space
+          boolean cr=false;
           for (;x < n; x++) {
             if (b[x] == ' ')
               break;
+            if(b[x] =='\n') {
+              cr=true;
+              break;
+            }
           }
           byte newbytes[]= new byte[x+oldbytes.length];
-          for(int ii=0;ii<oldbytes.length;ii++)
+          boolean isnumber=false;
+          for(int ii=0;ii<oldbytes.length;ii++) {
+            if (oldbytes[ii]>='0'&&oldbytes[ii]<='9')
+              isnumber=true;
             newbytes[ii]=oldbytes[ii];
-          for(int ii=0;ii<x;ii++)
+          }
+          for(int ii=0;ii<x;ii++) {
+            if (b[ii]>='0'&&b[ii]<='9')
+              isnumber=true;
             newbytes[ii+oldbytes.length]=b[ii];
-          x++; //skip past space
-          if (j>=0) {
-            buf[i][j]=(float)Double.parseDouble(new String(newbytes, 0, newbytes.length));
           }
-          j++;
+          if(x!=n)
+            x++; //skip past space
+          if(isnumber) {
+            if (j>=0) {
+              buf[i][j]=(float)Double.parseDouble(new String(newbytes, 0, newbytes.length));
+            }
+            j++;
+          }
+          if(cr) {
+            j=-1;
+            i++;
+          }
           oldbytes=null;
         }
 
         while (x < n) {
           int y=x;
+          boolean cr=false;
+          boolean isnumber=false;
           for(y=x;y<n;y++) {
+            if ((b[y]>='0')&&(b[y]<='9'))
+              isnumber=true;
             if (b[y]==' ')
               break;
             if (b[y]=='\n') {
-              i++;
-              j = -1;
-              x=y;//push end to current character
+              cr=true;
+              break;
             }
           }
           if (y==n) {
@@ -500,21 +519,28 @@ public class KMeans extends Thread {
               oldbytes[ii]=b[ii+x];
             break;
           }
-
           //otherwise x is beginning of character string, y is end
-          if (j>=0) {
-            buf[i][j]=(float)Double.parseDouble(new String(b,x,y-x));
+          if (isnumber) {
+            if (j>=0) {
+              buf[i][j]=(float)Double.parseDouble(new String(b,x,y-x));
+            }
+            j++;
+          }
+          if(cr) {
+            i++;//skip to next line
+            j = -1;//don't store line number
+            x=y;//skip to end of number
+            x++;//skip past return
+          } else {
+            x=y;//skip to end of number
+            x++;//skip past space
           }
-          x=y;//skip to end of number
-          x++;//skip past space
-          j++;
         }
       }
     }
     inputFile.close();
   }
 }
-
 /* =============================================================================
  *
  * End of kmeans.java