*** empty log message ***
authorhkhang <hkhang>
Tue, 3 Nov 2009 02:28:15 +0000 (02:28 +0000)
committerhkhang <hkhang>
Tue, 3 Nov 2009 02:28:15 +0000 (02:28 +0000)
Robust/src/Benchmarks/Spider/recovery/Query.java [deleted file]
Robust/src/Benchmarks/Spider/recovery/QueryList.java [deleted file]
Robust/src/Benchmarks/Spider/recovery/QueryQueue.java [deleted file]
Robust/src/Benchmarks/Spider/recovery/QueryTask.java
Robust/src/Benchmarks/Spider/recovery/QueryThread.java [deleted file]
Robust/src/Benchmarks/Spider/recovery/Spider.java
Robust/src/Benchmarks/Spider/recovery/dstm.conf
Robust/src/Benchmarks/Spider/recovery/makefile

diff --git a/Robust/src/Benchmarks/Spider/recovery/Query.java b/Robust/src/Benchmarks/Spider/recovery/Query.java
deleted file mode 100644 (file)
index 7812fff..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-public class Query {
-  GlobalString hostname;
-  GlobalString path;
-       int depth;
-  
-  public Query(GlobalString hostname, GlobalString path, int depth) {
-    this.hostname = global new GlobalString(hostname);
-    this.path = global new GlobalString(path);
-               this.depth = depth;
-  }
-
-       public int getDepth() {
-               return depth;
-       }
-       
-  public GlobalString getHostName() {
-    return hostname;
-  }
-  public GlobalString getPath() {
-    return path;
-  }
-
-  public GlobalString getHostName(GlobalString page) {
-    GlobalString http = global new GlobalString("http://");
-    if (page.indexOf(http) == -1) {
-      return getHostName();
-    } else {
-      int beginindex = page.indexOf(http) + http.length();
-           int endindex = page.indexOf('/',beginindex+1);
-           if ((beginindex == -1)) {
-        System.printString("ERROR");
-           }
-           if (endindex == -1)
-        endindex = page.length();
-      return page.subString(beginindex, endindex);
-    }
-  }
-
-  
-       public GlobalString getPathName(GlobalString page) {
-    GlobalString http = global new GlobalString("http://");
-    if (page.indexOf(http) == -1) {
-      GlobalString path = getPath();
-           int lastindex = path.lastindexOf('/');
-           if (lastindex == -1)
-        return page;
-           
-      GlobalStringBuffer sb = global new GlobalStringBuffer(path.subString(0,lastindex+1));
-           sb.append(page);
-      return sb.toGlobalString();
-    } else {
-      int beginindex = page.indexOf(http)+http.length();
-           int nextindex = page.indexOf('/',beginindex+1);
-           if ((beginindex == -1) || (nextindex == -1))
-        return global new GlobalString("index.html");
-      return page.subString(nextindex+1, page.length());
-    }
-  }
-}
diff --git a/Robust/src/Benchmarks/Spider/recovery/QueryList.java b/Robust/src/Benchmarks/Spider/recovery/QueryList.java
deleted file mode 100644 (file)
index d09167b..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-public class QueryList extends Queue {
-       Queue queries;
-
-  public QueryList() {
-               queries = global new Queue();
-  }
-
-  public boolean checkQuery(GlobalString x) {
-               boolean set = false;;
-               for (int i = 0 ; i < size; i++) {
-                       if (x.equals((GlobalString)elements[i])) {
-                               set = true;
-                               break;
-                       }
-               }
-               return set;
-  }
-
-       public void addQuery(GlobalString x) {
-               queries.push(x);
-       }
-}
diff --git a/Robust/src/Benchmarks/Spider/recovery/QueryQueue.java b/Robust/src/Benchmarks/Spider/recovery/QueryQueue.java
deleted file mode 100644 (file)
index 915bb4b..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-public class QueryQueue {
-       HashSet queries;
-       int size;
-
-       public QueryQueue() {
-               queries = new HashSet();
-               size = 0;
-       }
-
-       public LocalQuery pop() {
-               if (queries.isEmpty())
-                       return null;
-               LocalQuery q = (LocalQuery) queries.iterator().next();
-               queries.remove(q);
-               size--;
-               return q;
-       }
-
-       public void push(LocalQuery x) {
-               queries.add(x);
-               size++;
-       }
-       
-       public int size() {
-               return size;
-       }
-
-       public boolean isEmpty() {
-               if (size == 0)
-                       return true;
-               else 
-                       return false;
-       }
-}
index e3339846f5c115fd13728b9473965ccda2c01823..fa90e0fc2abcd476e7a334ea5a04f23a21202780 100644 (file)
@@ -2,6 +2,7 @@ public class QueryTask extends Task {
        int maxDepth;
        Queue toprocess;
        DistributedHashMap results;
+       GlobalString gTitle;
        GlobalString workingURL;
 
   public QueryTask(Queue todoList, DistributedHashMap doneList, int maxDepth, DistributedHashMap results) {
@@ -28,6 +29,7 @@ public class QueryTask extends Task {
                        LocalQuery lq;
                        String hostname;
                        String path;
+                       String title;
 
                        atomic {
                                gq = (GlobalQuery)myWork;
@@ -38,10 +40,11 @@ public class QueryTask extends Task {
                                gsb.append("/");
                                gsb.append(path);
                                workingURL = global new GlobalString(gsb.toGlobalString());
+                               gTitle = null;
                        }
                        lq = new LocalQuery(hostname, path, depth);
 
-                       System.printString(lq.getDepth()+" ");
+                       System.printString("["+lq.getDepth()+"] ");
                        System.printString("Processing - Hostname : ");
                        System.printString(hostname);
                        System.printString(", Path : ");
@@ -53,8 +56,10 @@ public class QueryTask extends Task {
                        requestQuery(hostname, path, s);
                        readResponse(lq, s);
 
-                       atomic {
-                               processList(lq, workingURL, results);
+                       if ((title = grabTitle(lq)) != null) {
+                               atomic {
+                                       gTitle = global new GlobalString(title);
+                               }
                        }
 
                        atomic {
@@ -66,7 +71,11 @@ public class QueryTask extends Task {
   }
 
        public void done(Object obj) {
+               if (gTitle != null) 
+                       processList();
+
                GlobalString str = global new GlobalString("true");
+
                doneList.put(workingURL, str);
 
                while(!toprocess.isEmpty()) {
@@ -85,6 +94,21 @@ public class QueryTask extends Task {
                }
        }
 
+       public static String grabTitle(LocalQuery lq) {
+               String sTitle = new String("<title>");  
+               String eTitle = new String("</title>");
+       String searchstr = lq.response.toString();
+               String title = null;
+
+               int mindex = searchstr.indexOf(sTitle);
+               if (mindex != -1) {
+                       int endquote = searchstr.indexOf(eTitle, mindex+sTitle.length());
+                       title = new String(searchstr.subString(mindex+sTitle.length(), endquote));
+               }
+
+               return title;
+       }
+
        public static void requestQuery(String hostname, String path, Socket sock) {
     StringBuffer req = new StringBuffer("GET "); 
     req.append("/");
@@ -154,68 +178,78 @@ public class QueryTask extends Task {
     }
   }
 
-       public static void processList(LocalQuery lq, GlobalString url, DistributedHashMap results) {
-               String sTitle = new String("<title>");  
-               String eTitle = new String("</title>");
-               String searchstr = lq.response.toString();
+       public void processList() {
                LinkedList ll;
+               GlobalString token = null;
+               int mindex = 0;
+               int endquote = 0;
 
-               int sIndex = searchstr.indexOf(sTitle);
-               if (sIndex != -1) {
-                       int eIndex = searchstr.indexOf(eTitle, sIndex+sTitle.length());
-                       String title = new String(searchstr.subString(sIndex+sTitle.length(), eIndex));
-                       ll = tokenize(title);
-
-                       Queue q;
-                       while (!ll.isEmpty()) {
-                               GlobalString word = global new GlobalString(ll.pop().toString());
-//                             q = (Queue)(results.get(word));
+               while (endquote != -1) {
+                       endquote = gTitle.indexOf(' ', mindex);
 
-//                             if (q == null) {
-                               if (!results.containsKey(word)) {
-                                       q = global new Queue();
-                               }
-                               else {
-                                       q = (Queue)(results.get(word));
+                       if (endquote != -1) {
+                               token = gTitle.subString(mindex, endquote);
+                               mindex = endquote + 1;
+                               if (censor(token)) {
+                                       continue;
                                }
-                               q.push(url);
-                               results.put(word, q);
+                               token = refinement(token);
+                       }
+                       else {
+                               token = gTitle.subString(mindex);
+                               token = refinement(token);
+                       }
 
-                               System.out.println("Key : ["+word.toLocalString()+"],["+q.size()+"]");
 /*
-                               for (int i = 0; i < q.size(); i++) {
-                                       Object obj = q.elements[i];
-                                       GlobalString str = global new GlobalString((GlobalString)obj);
-                                       System.out.println("\t["+i+"] : "+str.toLocalString());
-                               }*/
+                       Queue q;
+                       if ((q = (Queue)(results.remove(token))) == null) {
+                               q = global new Queue();
                        }
+                       else {
+                               q = (Queue)(results.get(token));
+                       }
+                       // bug here <- object id changed?? 
+                       q.push(workingURL);     
+                       results.put(token, q);
+                       
+                       System.out.println("Key : ["+token.toLocalString()+"],["+q.size()+"]");
+                       */
                }
        }
 
-       public static LinkedList tokenize(String str) {
-               LinkedList ll;
-               int sIndex = 0;
-               int eIndex = 0;
-               String token;
+       public boolean censor(GlobalString str) {
+               if (str.equals("of"))   return true;
+               else if (str.equals("for")) return true;
+               else if (str.equals("a")) return true;
+               else if (str.equals("an")) return true;
+               else if (str.equals("the")) return true;
+               else if (str.equals("at")) return true;
+               else if (str.equals("and")) return true;
+               else if (str.equals("or")) return true;
+               else if (str.equals("but")) return true;
+               else if (str.equals(".")) return true;
+               else if (str.equals("=")) return true;
+               else if (str.equals("-")) return true;
+               else if (str.equals(":")) return true;
+               else if (str.equals(";")) return true;
+               else if (str.equals("\'")) return true;
+               else if (str.equals("\"")) return true;
+               else if (str.equals("@")) return true;
+               else return false;
+       }
 
-               ll = new LinkedList();
-               
-               // and, or, of, at, but, '.', ',', ':' ';', '"', ' ', '-', '='
-               while (true) {
-                       eIndex = str.indexOf(' ', sIndex);
-                       if (eIndex == -1) {
-                               token = str.subString(sIndex);
-                               ll.add(token);
-                               break;
-                       }
-                       else {
-                               token = str.subString(sIndex, eIndex);
-                               ll.add(token);
-                               sIndex = eIndex+1;
-                       }
+       public GlobalString refinement(GlobalString str) {
+               if (str.charAt(str.length()-1) == ',') {
+                       return str.subString(0, str.length()-1);
                }
-               
-               return ll;
+               else if (str.charAt(str.length()-1) == ':') {
+                       return str.subString(0, str.length()-1);
+               }
+               else if (str.charAt(str.length()-1) == 's') {
+                       if (str.charAt(str.length()-2) == '\'')
+                               return str.subString(0, str.length()-2);        
+               }
+               return str;
        }
        
   public static Queue processPage(LocalQuery lq) {
@@ -229,7 +263,6 @@ public class QueryTask extends Task {
                depth = lq.getDepth() + 1;
 
                toprocess = global new Queue();
-
                while(cont) {
                        int mindex = searchstr.indexOf(href,index);
                        if (mindex != -1) {     
diff --git a/Robust/src/Benchmarks/Spider/recovery/QueryThread.java b/Robust/src/Benchmarks/Spider/recovery/QueryThread.java
deleted file mode 100644 (file)
index 7d6e353..0000000
+++ /dev/null
@@ -1,175 +0,0 @@
-public class QueryThread extends Task {
-       int maxDepth;
-       int maxSearchDepth;
-
-  public QueryThread(Queue todoList, Queue doneList, int maxDepth, int maxSearchDepth) {
-    this.todoList = todoList;
-               this.doneList = doneList;
-               this.maxDepth = maxDepth;
-               this.maxSearchDepth = maxSearchDepth;
-  }
-
-  public void execute() {
-               int depth;
-    int max;
-    int maxSearch;
-               
-               atomic {
-                       depth = ((Query)myWork).getDepth();
-      max = this.maxDepth;
-      maxSearch = this.maxSearchDepth;
-               }
-
-               if (depth < max) {
-                       /* global variables */
-                       Query q;
-                       GlobalString ghostname;
-                       GlobalString gpath;
-
-                       /* local variables */
-                       QueryQueue toprocess;
-                       LocalQuery lq;
-                       String hostname;
-                       String path;
-
-                       atomic {
-                               q = (Query)myWork;
-                               ghostname = q.getHostName();
-                               gpath = q.getPath();
-                               hostname = new String(GlobalString.toLocalCharArray(ghostname));
-                               path = new String(GlobalString.toLocalCharArray(gpath));
-                       }
-                       lq = new LocalQuery(hostname, path, depth);
-
-                       System.printString("Processing - Hostname : ");
-                       System.printString(hostname);
-                       System.printString(", Path : ");
-                       System.printString(path);
-                       System.printString("\n");
-
-                       Socket s = new Socket(hostname, 80);
-    
-                       requestQuery(hostname, path, s);
-                       readResponse(lq, s);
-                       toprocess = processPage(lq,maxSearch);
-                       s.close();
-
-                       atomic {
-                               while(!toprocess.isEmpty()) {
-                                       lq = toprocess.pop();
-                                       ghostname = global new GlobalString(lq.getHostName());
-                                       gpath = global new GlobalString(lq.getPath());
-
-                                       q = global new Query(ghostname, gpath, lq.getDepth());
-                                       todoList.push(q);
-                               }
-                       }
-               }
-  }
-       
-       public static void requestQuery(String hostname, String path, Socket sock) {
-    StringBuffer req = new StringBuffer("GET "); 
-    req.append("/");
-               req.append(path);
-    req.append(" HTTP/1.1\r\nHost:");
-    req.append(hostname);
-    req.append("\r\n\r\n");
-    sock.write(req.toString().getBytes());
-  }
-
-       public static void readResponse(LocalQuery lq, Socket sock) {
-       //    state 0 - nothing
-       //    state 1 - \r
-       //    state 2 - \r\n
-       //    state 3 - \r\n\r
-       //    state 4 - \r\n\r\n
-    int state=0;
-    while(true) {
-      if (state<4) {
-        if (state==0) {
-          byte[] b=new byte[1];
-          int numchars=sock.read(b);
-          if ((numchars==1)) {
-            if (b[0]=='\r') {
-              state++;
-            }
-          } else
-                                               return;
-        } else if (state==1) {
-          byte[] b=new byte[1];
-          int numchars=sock.read(b);
-          if (numchars==1) {
-            if (b[0]=='\n')
-              state++;
-            else
-              state=0;
-          } else return;
-        } else if (state==2) {
-          byte[] b=new byte[1];
-          int numchars=sock.read(b);
-          if (numchars==1) {
-            if (b[0]=='\r')
-              state++;
-            else
-              state=0;
-          } else return;
-        } else if (state==3) {
-          byte[] b=new byte[1];
-          int numchars=sock.read(b);
-          if (numchars==1) {
-            if (b[0]=='\n')
-              state++;
-            else
-              state=0;
-          } else return;
-        }
-      } else {
-                               byte[] buffer=new byte[1024];
-        int numchars=sock.read(buffer);
-        if (numchars==0)
-          return;
-        else {
-          String curr=(new String(buffer)).subString(0,numchars);
-                                       lq.response.append(curr);
-        }
-      }
-    }
-  }
-       
-       public void done(Object obj) {
-               doneList.push(obj);
-       }
-
-  public static QueryQueue processPage(LocalQuery lq,int maxSearchDepth) {
-    int index = 0;
-       String href = new String("href=\"");
-       String searchstr = lq.response.toString();
-               int depth;
-       boolean cont = true;
-
-               QueryQueue toprocess = new QueryQueue();
-               depth = lq.getDepth() + 1;
-
-               int searchDepthCnt = 0;
-               while(cont && (searchDepthCnt < maxSearchDepth)) {
-                       int mindex = searchstr.indexOf(href,index);
-                       if (mindex != -1) {     
-                               int endquote = searchstr.indexOf('"', mindex+href.length());
-               if (endquote != -1) {
-                     String match = searchstr.subString(mindex+href.length(), endquote);
-                                       String match2 = lq.makewebcanonical(match);
-       
-                     if (match2 != null) {
-                                               LocalQuery newlq = new LocalQuery(lq.getHostName(match), lq.getPathName(match), depth);
-
-                                               toprocess.push(newlq);
-                                               searchDepthCnt++;
-                                       }
-                                       index = endquote;
-        } else cont = false;
-      } else cont = false;
-    }
-
-               return toprocess;
-  }
-}
index 9335ef33c78b1d14dee1a605cac18e6efd0aabc5..356d6fa873d66ae98250793e9dcc221ec010a17a 100644 (file)
@@ -16,9 +16,12 @@ public class Spider {
                GlobalString firstmachine;
 
                int mid[] = new int[NUM_THREADS];
-               mid[0] = (128<<24)|(195<<16)|(180<<8)|21;        
-               mid[1] = (128<<24)|(195<<16)|(180<<8)|24;        
-               mid[2] = (128<<24)|(195<<16)|(180<<8)|26;        
+//             mid[0] = (128<<24)|(195<<16)|(180<<8)|21;        
+//             mid[1] = (128<<24)|(195<<16)|(180<<8)|24;        
+//             mid[2] = (128<<24)|(195<<16)|(180<<8)|26;        
+               mid[0] = (128<<24)|(195<<16)|(136<<8)|162;       
+               mid[1] = (128<<24)|(195<<16)|(136<<8)|163;       
+               mid[2] = (128<<24)|(195<<16)|(136<<8)|164;       
 
                atomic {
                        firstmachine = global new GlobalString(args[1]);
index 935ef319989f4ce0e206e3baba3058fa719508a7..eff447b84ee46f8283d2f55e0e45f4c543ef1333 100644 (file)
@@ -1,3 +1,6 @@
-128.195.180.21
-128.195.180.24
-128.195.180.26
+#128.195.180.21
+#128.195.180.24
+#128.195.180.26
+128.195.136.162
+128.195.136.163
+128.195.136.164
index 841242a08f365863fb5d4663cbec2c1f4d73862e..eafdf663180655b578b1cc11610c109000cd4b6e 100644 (file)
@@ -2,11 +2,10 @@ MAINCLASS=Spider
 SUBCLASS=Query
 SRC1=${MAINCLASS}.java
 SRC2=Global${SUBCLASS}.java
-SRC3=${SUBCLASS}Queue.java
-SRC4=${SUBCLASS}Task.java
+SRC3=${SUBCLASS}Task.java
 FLAGS= -recovery -dsmtask -dsm -dsmtask -32bit -nooptimize -debug -mainclass ${MAINCLASS}
 default:
-       ../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC2} ${SRC3} ${SRC4} ${SRC1}
+       ../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC2} ${SRC3} ${SRC1}
 
 clean:
        rm -rf tmpbuilddirectory