changes to Spider benchmark
authoradash <adash>
Tue, 18 May 2010 22:29:16 +0000 (22:29 +0000)
committeradash <adash>
Tue, 18 May 2010 22:29:16 +0000 (22:29 +0000)
Robust/src/Benchmarks/Recovery/Spider/java/QueryTask.java
Robust/src/Benchmarks/Recovery/Spider/java/Spider.java
Robust/src/Benchmarks/Recovery/Spider/java/makefile
Robust/src/Benchmarks/Recovery/Spider/recovery/QueryTask.java
Robust/src/Benchmarks/Recovery/Spider/recovery/Spider.java
Robust/src/Benchmarks/Recovery/Spider/recovery/Task.java
Robust/src/Benchmarks/Recovery/Spider/recovery/TaskSet.java
Robust/src/Benchmarks/Recovery/Spider/recovery/Worker.java
Robust/src/Benchmarks/Recovery/Spider/recovery/dstm.conf [deleted file]
Robust/src/Benchmarks/Recovery/Spider/recovery/makefile

index a00b4d297b1a253094ae0e01e47e701b3811782f..9b44deb6629b1a4a7981b59cdbe78930f5b73304 100644 (file)
@@ -49,31 +49,23 @@ public class QueryTask {
                        //System.printString(path);
                        //System.printString("\n");
 
-                       if (isDocument(path)) {
-                               lq = (LocalQuery)(todoList.pop());
-                               depth = lq.getDepth();
-                               continue;
-                       }
-
                        Socket s = new Socket();
 
                        if(s.connect(hostname, 80) == -1) {
-                               lq = (LocalQuery)(todoList.pop());
-                               depth = lq.getDepth();
-                               continue;
-                       }
-
-//                     System.out.println("AAA");
-                       requestQuery(hostname, path, s);
-//                     System.out.println("BBB");
-                       readResponse(lq, s);
-
-//                     System.out.println("CCC");
-                       if ((title = grabTitle(lq)) != null) {
-                               toprocess = processPage(lq);
+                               //lq = (LocalQuery)(todoList.pop());
+                               //depth = lq.getDepth();
+                               //continue;
+                return;
                        }
-//                     System.out.println("DDD");
 
+                       if(requestQuery(hostname, path, s) == 0) {
+              readResponse(lq, s);
+              if ((title = grabTitle(lq)) != null) {
+                toprocess = processPage(lq);
+              }
+            } else {
+              ;
+            }
                        s.close();
                        done(toprocess);
                        lq = (LocalQuery)(todoList.pop());
@@ -102,10 +94,11 @@ public class QueryTask {
        }
 
        public void done(Queue toprocess) {
+        /*
                if ((title != null) && (title.length() > 0)) {
                        processedList();
                }
-
+        */
                int searchCnt = 0;
                while(!toprocess.isEmpty()) {
                        LocalQuery q = (LocalQuery)toprocess.pop();
@@ -134,7 +127,6 @@ public class QueryTask {
 
                while (iter.hasNext() == true) {
                        str = ((String)(iter.next()));
-                       //System.printString(str + "\n");
                }
        }
 
@@ -212,16 +204,20 @@ public class QueryTask {
                else
                        return false;
        }
-       
-       public static void requestQuery(String hostname, String path, Socket sock) {
-    StringBuffer req = new StringBuffer("GET "); 
-    req.append("/");
-               req.append(path);
-         req.append(" HTTP/1.0\r\nHost: ");
-    req.append(hostname);
-    req.append("\r\n\r\n");
-    sock.write(req.toString().getBytes());
-  }
+
+    public static int requestQuery(String hostname, String path, Socket sock) {
+      StringBuffer req = new StringBuffer("GET "); 
+      req.append("/");
+      req.append(path);
+      req.append(" HTTP/1.0\r\nHost: ");
+      req.append(hostname);
+      req.append("\r\n\r\n");
+      if(sock.write(req.toString().getBytes()) == -1) {
+        return -1;
+      } else {
+        return 0;
+      }
+    }
 
        public static void readResponse(LocalQuery lq, Socket sock) {
        //    state 0 - nothing
@@ -344,39 +340,38 @@ public class QueryTask {
                return str;
        }
 
-  public static Queue processPage(LocalQuery lq) {
-    int index = 0;
-       String href = new String("href=\"");
-       String searchstr = lq.response.toString();
-               int depth;
-       boolean cont = true;
-               Queue toprocess;
-
-               depth = lq.getDepth() + 1;
-
-               toprocess = new Queue();
-               while(cont) {
-                       int mindex = searchstr.indexOf(href,index);
-                       if (mindex != -1) {     
-                               int endquote = searchstr.indexOf('"', mindex+href.length());
-               if (endquote != -1) {
-                     String match = searchstr.subString(mindex+href.length(), endquote);
-                                       String match2 = lq.makewebcanonical(match);
-       
-                                       String hostname;
-                                       String path;
-
-                                       hostname = new String(lq.getHostName(match));
-                                       path = new String(lq.getPathName(match));
-
-                     if (match2 != null) {
-                                                       LocalQuery gq = new LocalQuery(hostname, path, depth);
-                                                       toprocess.push(gq);
-                                       }
-                                       index = endquote;
-        } else cont = false;
-      } else cont = false;
+    public static Queue processPage(LocalQuery lq) {
+      int index = 0;
+      String href = new String("href=\"");
+      String searchstr = lq.response.toString();
+      int depth;
+      Queue toprocess;
+
+      depth = lq.getDepth() + 1;
+
+      toprocess = new Queue();
+      while(true) {
+        int mindex = searchstr.indexOf(href,index);
+        if (mindex != -1) {    
+          int endquote = searchstr.indexOf('"', mindex+href.length());
+          if (endquote != -1) {
+            String match = searchstr.subString(mindex+href.length(), endquote);
+            String match2 = lq.makewebcanonical(match);
+
+            String hostname;
+            String path;
+
+            hostname = new String(lq.getHostName(match));
+            path = new String(lq.getPathName(match));
+
+            if (match2 != null) {
+              LocalQuery gq = new LocalQuery(hostname, path, depth);
+              toprocess.push(gq);
+            }
+            index = endquote;
+          } else break;
+        } else break;
+      }
+      return toprocess;
     }
-               return toprocess;
-  }
 }
index b65a95880f96dd7cf5c21dbd6e277f016554adf3..63d17e29d827f6c321f35a57099bba7e94030f78 100644 (file)
@@ -2,10 +2,11 @@ public class Spider {
        public static void main(String[] args) {
                int NUM_THREADS = 3;
                int maxDepth = 3;
-               int maxSearchDepth = 10;
+               int maxSearchDepth = 20;
                int i, j;
                QueryTask qt;
-        String fm = "www.uci.edu";
+        //String fm = "www.uci.edu";
+        String fm = "dc-11.calit2.uci.edu";
 
                String firstmachine;
                String firstpage;
@@ -20,7 +21,8 @@ public class Spider {
         }
 
                firstmachine = new String(fm);
-        firstpage = new String("");;
+        //firstpage = new String("");;
+        firstpage =  new String("1.html");
 
                HashMap visitedList = new HashMap(500, 0.75f);
                HashMap results = new HashMap(100, 0.75f);
index cff2943442d633db2646db1d8bd1bc659e9cc8cf..a2d79c155f8fa2d1e608496280c26c635fddf3be 100644 (file)
@@ -3,7 +3,7 @@ SUBCLASS=Query
 SRC1=${MAINCLASS}.java
 SRC2=Local${SUBCLASS}.java
 SRC3=${SUBCLASS}Task.java
-FLAGS= -optimize -thread -mainclass ${MAINCLASS}
+FLAGS=-debug -optimize -thread -mainclass ${MAINCLASS}
 default:
        ../../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC2} ${SRC3} ${SRC1}
 
index 79011c4d62a75cd37810b9792704316cff408deb..a7d570026fc48975eff4a18973a6c6b4f0ed2057 100644 (file)
@@ -27,58 +27,90 @@ public class QueryTask extends Task {
     int ldepth;
 
     atomic {
-      System.out.println("trans 2");
       max = this.maxDepth;
       maxSearch = this.maxSearchDepth;
       ldepth=this.depth;
     }
-    
+
     if (ldepth < max) {
       /* local variables */
-      String hostname;
-      String path;
-      String title;
-      
+      String hostname=null;
+      String path=null;
+      String title=null;
+
       atomic {
-      System.out.println("trans 3");
-       hostname = new String(GlobalString.toLocalCharArray(getHostName()));
-       path = new String(GlobalString.toLocalCharArray(getPath()));
-       
-       GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname);
-       gsb.append("/");
-       gsb.append(path);
-       workingURL = global new GlobalString(gsb.toGlobalString());
-       gTitle = null;
+        hostname = new String(GlobalString.toLocalCharArray(getHostName()));
+        path = new String(GlobalString.toLocalCharArray(getPath()));
+        System.out.println("hostname= " + hostname + " path= " + path);
+        GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname);
+        gsb.append("/");
+        gsb.append(path);
+        workingURL = global new GlobalString(gsb.toGlobalString());
+        gTitle = null;
       }
       LocalQuery lq = new LocalQuery(hostname, path, ldepth);
 
+      /*
       if (isDocument(path)) {
-       return;
+        return;
       }
-      
+      */
+
       Socket s = new Socket();
 
       if(s.connect(hostname, 80) == -1) {
-       return;
+        return;
       }
-      
+
+      if(requestQuery(hostname, path, s) == 0) {
+        readResponse(lq, s);
+        if ((title = grabTitle(lq)) != null) {
+          atomic {
+            //commits everything...either works or fails
+            gTitle = global new GlobalString(title);
+            processPage(lq);
+            dequeueTask();
+          }
+        }
+      } else {
+        atomic {
+          dequeueTask();
+        }
+      }
+
+      /*
+      if(requestQuery(hostname, path, s) == -1) {
+        atomic {
+          dequeueTask();
+        }
+      } else {
+        readResponse(lq, s);
+        if ((title = grabTitle(lq)) != null) {
+          atomic {
+            //commits everything...either works or fails
+            gTitle = global new GlobalString(title);
+            processPage(lq);
+            dequeueTask();
+          }
+        }
+      }
+      */
+      /*
       requestQuery(hostname, path, s);
       readResponse(lq, s);
-      
       if ((title = grabTitle(lq)) != null) {
-       atomic {
-      System.out.println("trans 4");
-         //commits everything...either works or fails
-         gTitle = global new GlobalString(title);
-         processPage(lq);
-         dequeueTask();
-       }
+        atomic {
+          //commits everything...either works or fails
+          gTitle = global new GlobalString(title);
+          processPage(lq);
+          dequeueTask();
+        }
       }
+      */
       s.close();
     } else {
       atomic {
-      System.out.println("trans 5");
-       dequeueTask();
+        dequeueTask();
       }
     }
   }
@@ -130,8 +162,8 @@ public class QueryTask extends Task {
   public GlobalString getPathName(GlobalString page) {
     GlobalString http = global new GlobalString("http://");
     GlobalString https = global new GlobalString("https://");
-    int beginindex;
-    int nextindex;
+    int beginindex=0;
+    int nextindex=0;
     
     if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) {
       GlobalString path = getPath();
@@ -148,7 +180,6 @@ public class QueryTask extends Task {
       beginindex = page.indexOf(http) + http.length();
     }
     nextindex = page.indexOf('/',beginindex+1);
-    
     if ((beginindex == -1) || (nextindex == -1))
       return global new GlobalString("index.html");
     return page.subString(nextindex+1, page.length());
@@ -156,7 +187,6 @@ public class QueryTask extends Task {
   
   public static boolean isDocument(String str) {
     int index = str.lastindexOf('.');
-    
     if (index != -1) {
       if ((str.subString(index+1)).equals("pdf")) return true;
       else if ((str.subString(index+1)).equals("ps")) return true;
@@ -176,12 +206,14 @@ public class QueryTask extends Task {
     return false;
   }
   
+  /*
   public void output() {
     String str;
     Iterator iter = results_list.iterator();
     
     System.out.println("Size = " + results_list.size());
   }
+  */
 
   public static String grabTitle(LocalQuery lq) {
     String sBrace = new String("<");   
@@ -220,26 +252,28 @@ public class QueryTask extends Task {
     if (mindex != -1) {
       title = searchstr.subString(mindex, endquote);
       if (Character.isWhitespace(title.charAt(0))){
-       mindex=0;
-       while (Character.isWhitespace(title.charAt(mindex++)));
-       mindex--;
-       if (mindex >= title.length()) return null;
-       title = new String(title.subString(mindex));
+        mindex=0;
+        while (Character.isWhitespace(title.charAt(mindex++)));
+        mindex--;
+        if (mindex >= title.length()) return null;
+        title = new String(title.subString(mindex));
       }
-      
+
       if (Character.isWhitespace(title.charAt(title.length()-1))) {
-       endquote=title.length()-1;
-       while (Character.isWhitespace(title.charAt(endquote--)));
-       endquote += 2;
-       if (mindex >= endquote) return null;
-       title = new String(title.subString(0, endquote));
+        endquote=title.length()-1;
+        while (Character.isWhitespace(title.charAt(endquote--)));
+        endquote += 2;
+        if (mindex >= endquote) {
+          return null;
+        }
+        title = new String(title.subString(0, endquote));
       }
-      
+
       if (isErrorPage(title)) {
-       return null;
+        return null;
       }
     }
-    
+
     return title;
   }
 
@@ -258,14 +292,18 @@ public class QueryTask extends Task {
       return false;
   }
   
-  public static void requestQuery(String hostname, String path, Socket sock) {
+  public static int requestQuery(String hostname, String path, Socket sock) {
     StringBuffer req = new StringBuffer("GET "); 
     req.append("/");
     req.append(path);
     req.append(" HTTP/1.0\r\nHost: ");
     req.append(hostname);
     req.append("\r\n\r\n");
-    sock.write(req.toString().getBytes());
+    if(sock.write(req.toString().getBytes()) == -1) {
+      return -1; //error in openning this webpage
+    } else { 
+      return 0;
+    }
   }
   
   public static void readResponse(LocalQuery lq, Socket sock) {
@@ -290,25 +328,25 @@ public class QueryTask extends Task {
     GlobalString token = null;
     int mindex = 0;
     int endquote = 0;
-    
+
     while (endquote != -1) {
       endquote = gTitle.indexOf(' ', mindex);
-      
+
       if (endquote != -1) {
-       token = gTitle.subString(mindex, endquote);
-       mindex = endquote + 1;
-       if (filter(token)) {
-         continue;
-       }
-       token = refine(token);
+        token = gTitle.subString(mindex, endquote);
+        mindex = endquote + 1;
+        if (filter(token)) {
+          continue;
+        }
+        token = refine(token);
       } else {
-       token = gTitle.subString(mindex);
-       token = refine(token);
+        token = gTitle.subString(mindex);
+        token = refine(token);
       }
-      
+
       GlobalQueue q = (GlobalQueue)results.get(token);
       if (q == null) {
-       q = global new GlobalQueue();
+        q = global new GlobalQueue();
       }
       q.push(workingURL);      
       results.put(token, q);
@@ -380,11 +418,14 @@ public class QueryTask extends Task {
     return str;
   }
   
-  
+
   public void processPage(LocalQuery lq) {
+    //System.out.println("Inside processPage");
+    /*
     if ((gTitle != null) && (gTitle.length() > 0)) {
       processedList();
     }
+    */
 
     int index = 0;
     String href = new String("href=\"");
@@ -393,36 +434,45 @@ public class QueryTask extends Task {
     while(true) {
       int mindex = searchstr.indexOf(href,index);
       if (mindex != -1) {      
-       int endquote = searchstr.indexOf('"', mindex+href.length());
-       if (endquote != -1) {
-         String match = searchstr.subString(mindex+href.length(), endquote);
-         String match2 = lq.makewebcanonical(match);
-         
-         GlobalString ghostname;
-         GlobalString gpath;
-         
-         ghostname = global new GlobalString(lq.getHostName(match));
-         gpath = global new GlobalString(lq.getPathName(match));
-         
-         GlobalStringBuffer gsb = global new GlobalStringBuffer(ghostname);
-         gsb.append("/");
-         gsb.append(gpath);
+        int endquote = searchstr.indexOf('"', mindex+href.length());
+        if (endquote != -1) {
+          String match = searchstr.subString(mindex+href.length(), endquote);
+          String match2 = lq.makewebcanonical(match);
+          //System.out.println("match= " + match + " match2= " + match2);
 
-         if (match2 != null) {
-           if (!visitedList.containsKey(gsb.toGlobalString()) && (searchCnt < maxSearchDepth)) {
-             GlobalString str = global new GlobalString("1");
-             visitedList.put(gsb.toGlobalString(), str);
-             results_list.add(gsb.toGlobalString());
-             searchCnt++;
-             QueryTask gq = global new QueryTask(visitedList, maxDepth, maxSearchDepth, results, results_list, ghostname, gpath, lq.getDepth()+1);
-             enqueueTask(gq);
-           }
-         }
-         index = endquote;
-        } else 
-         break;
-      } else 
-       break;
+          GlobalString ghostname;
+          GlobalString gpath;
+
+          ghostname = global new GlobalString(lq.getHostName(match));
+          gpath = global new GlobalString(lq.getPathName(match));
+
+          GlobalStringBuffer gsb = global new GlobalStringBuffer(ghostname);
+          gsb.append("/");
+          gsb.append(gpath);
+          //System.out.println("match2=" + match2 + lq.getHostName(match)+"/"+lq.getPathName(match));
+
+          if (match2 != null) {
+            if (!visitedList.containsKey(gsb.toGlobalString()) && (searchCnt < maxSearchDepth)) {
+              //System.out.println("I am here");
+              GlobalString str = global new GlobalString("1");
+              visitedList.put(gsb.toGlobalString(), str);
+              //results_list.add(gsb.toGlobalString());
+              searchCnt++;
+              QueryTask gq = global new QueryTask(visitedList, maxDepth, maxSearchDepth, results, results_list, ghostname, gpath, lq.getDepth()+1);
+              enqueueTask(gq);
+            }
+          }
+          index = endquote;
+        } else {
+          //System.out.println("mindex= " + mindex + " index= " + index + " endquote= " + endquote + " href.length()= " + href.length());
+          break;
+        }
+      } else { 
+        //System.out.println("mindex= " + mindex + " index= " + index);
+        break;
+      }
     }
+    //System.out.println("End of processPage");
+    //System.out.println("\n");
   }
 }
index 5a29018643710d3669817a7ba85dca0ab3f23858..88a659e3d8b18d9facc940be8c9dab830d35be21 100644 (file)
@@ -11,7 +11,7 @@ public class Spider {
     int maxDepth = 3;
     int maxSearchDepth = 10;
     int i, j;
-    String fm = "www.uci.edu";
+    String fm = "";
     String fp = "";
     
     if(args.length != 3) {
@@ -19,10 +19,11 @@ public class Spider {
       System.exit(0);
     } else {
       NUM_THREADS = Integer.parseInt(args[0]);
-      fm = args[1];
+      fm = "dc-11.calit2.uci.edu";
       maxDepth = Integer.parseInt(args[2]);
     }
     
+    int nQueue = 3;
     int mid[] = new int[8];
 
     mid[0] = (128<<24)|(195<<16)|(136<<8)|162;
@@ -38,17 +39,17 @@ public class Spider {
     atomic {
       //set up workers
       ts=global new TaskSet(NUM_THREADS);
-      for (i = 0; i < NUM_THREADS; i++) {
-        ts.threads[i] = global new Worker(ts,i,(NUM_THREADS/2));
-      }
-      for (i = 0; i < NUM_THREADS/2; i++) {
+      for (i = 0; i < nQueue; i++) {
         ts.todo[i] = global new GlobalQueue();
       }
+      for (i = 0; i < NUM_THREADS; i++) {
+        ts.threads[i] = global new Worker(ts,i,nQueue);
+      }
     }
 
     atomic {
       GlobalString firstmachine = global new GlobalString(fm);
-      GlobalString firstpage = global new GlobalString("");
+      GlobalString firstpage = global new GlobalString("1.html");
       DistributedHashMap visitedList = global new DistributedHashMap(500, 500, 0.75f);
       DistributedHashMap results = global new DistributedHashMap(100, 100, 0.75f);
       DistributedLinkedList results_list = global new DistributedLinkedList();
index c45c25e866fdf434667f0a620c44c35306a50abc..9d5cdfe61895a1b61135687f91bbd1a7e4dd1e93 100644 (file)
@@ -12,7 +12,6 @@ public class Task {
     w.workingtask=null;
   }
   public void enqueueTask(Task t) {
-    //System.out.println("queueid= " + queueid);
     w.tasks.todo[queueid].push(t);
   }
   public native void execution();
index 1904a89ec89ea7cca2a3effa7f5d964a54c18a0d..27a8da147790244304066e8c5c0059e77022756c 100644 (file)
@@ -2,7 +2,7 @@ public class TaskSet {
   public TaskSet(int nt) {
     numthreads=nt;
     threads=global new Worker[nt];
-    todo=global new GlobalQueue[(nt/2)];
+    todo=global new GlobalQueue[3];
   }
 
   //Tasks to be executed
index 0c8991e447bda10bd59ef218eb150c868dbe13de..b6add81038f45687350b1836cad75c48aaf93e8c 100644 (file)
@@ -7,7 +7,7 @@ public class Worker extends Thread {
   Worker(TaskSet tasks, int id, int numQueue) {
     this.tasks = tasks;
     this.id = id;
-    this.numQueue = 3; // Correct this 3 should be hash defined
+    this.numQueue = numQueue;
   }
   
   public void run() {
@@ -17,9 +17,7 @@ public class Worker extends Thread {
     while(notdone) {
       Task t=null;
       atomic {
-        System.out.println("Transacion 1");
         int qindex = (id%numQueue);
-        //System.out.println("id= " + id + " numQueue= " + numQueue);
         if (!tasks.todo[qindex].isEmpty()) {
           //grab segment from todo list
           t=workingtask=(Task) tasks.todo[qindex].pop();
diff --git a/Robust/src/Benchmarks/Recovery/Spider/recovery/dstm.conf b/Robust/src/Benchmarks/Recovery/Spider/recovery/dstm.conf
deleted file mode 100644 (file)
index cbce2d5..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-128.195.136.162
-128.195.136.163
-128.195.136.164
-128.195.136.165
-128.195.136.166
-128.195.136.167
-128.195.136.168
-128.195.136.169
index c98fd3f03dcbaf19234bffd780ede60002b343e3..358d3a802cb1c00a4d8b29c337b64e2a77766a62 100644 (file)
@@ -3,10 +3,11 @@ SUBCLASS=Query
 SRC1=${MAINCLASS}.java
 SRC2=Global${SUBCLASS}.java
 SRC3=${SUBCLASS}Task.java
-FLAGS=-recoverystats -recovery -transstats -dsmcaching -dsm -dsmtask -optimize -mainclass ${MAINCLASS}
-DSMFLAGS= -dsm -dsmtask -sandbox -transstats -optimize -mainclass ${MAINCLASS}
+FLAGS=-recoverystats -recovery -transstats -dsmcaching -32bit -dsm -dsmtask -optimize -mainclass ${MAINCLASS}
+DSMFLAGS=-dsm -dsmtask -transstats -dsmcaching -debug -optimize -mainclass ${MAINCLASS}
 default:
 #      ../../../../buildscript ${DSMFLAGS} -o ${MAINCLASS}DSM GlobalQuery.java LocalQuery.java QueryTask.java Spider.java Task.java GlobalQueue.java Worker.java TaskSet.java
+#      ../../../../buildscript ${FLAGS} -o ${MAINCLASS} GlobalQuery.java LocalQuery.java QueryTask.java Spider.java Task.java GlobalQueue.java Worker.java TaskSet.java DistributedHashMap.java
        ../../../../buildscript ${FLAGS} -o ${MAINCLASS} GlobalQuery.java LocalQuery.java QueryTask.java Spider.java Task.java GlobalQueue.java Worker.java TaskSet.java
 
 clean: