*** empty log message ***
authorhkhang <hkhang>
Mon, 2 Nov 2009 21:48:27 +0000 (21:48 +0000)
committerhkhang <hkhang>
Mon, 2 Nov 2009 21:48:27 +0000 (21:48 +0000)
Robust/src/Benchmarks/Spider/recovery/GlobalQuery.java [new file with mode: 0644]
Robust/src/Benchmarks/Spider/recovery/LocalQuery.java
Robust/src/Benchmarks/Spider/recovery/QueryQueue.java
Robust/src/Benchmarks/Spider/recovery/QueryTask.java [new file with mode: 0644]
Robust/src/Benchmarks/Spider/recovery/Spider.java
Robust/src/Benchmarks/Spider/recovery/dstm.conf
Robust/src/Benchmarks/Spider/recovery/makefile
Robust/src/ClassLibrary/JavaDSM/DistributedHashMap.java
Robust/src/ClassLibrary/JavaDSM/Task.java

diff --git a/Robust/src/Benchmarks/Spider/recovery/GlobalQuery.java b/Robust/src/Benchmarks/Spider/recovery/GlobalQuery.java
new file mode 100644 (file)
index 0000000..0a9d163
--- /dev/null
@@ -0,0 +1,92 @@
+public class GlobalQuery {
+  GlobalString hostname;
+  GlobalString path;
+       int depth;
+  
+       public GlobalQuery(GlobalString hostname) {
+               this.hostname = global new GlobalString(hostname);
+               this.path = global new GlobalString("");
+               this.depth = 0;
+       }
+
+  public GlobalQuery(GlobalString hostname, GlobalString path, int depth) {
+    this.hostname = global new GlobalString(hostname);
+    this.path = global new GlobalString(path);
+               this.depth = depth;
+  }
+
+       public int getDepth() {
+               return depth;
+       }
+       
+  public GlobalString getHostName() {
+    return hostname;
+  }
+  public GlobalString getPath() {
+    return path;
+  }
+
+  public GlobalString makewebcanonical(GlobalString page) {
+    GlobalStringBuffer b = global new GlobalStringBuffer(getHostName(page));
+    b.append("/");
+               b.append(getPathName(page));
+    return b.toGlobalString();
+       }
+
+  public GlobalString getHostName(GlobalString page) {
+    GlobalString http = global new GlobalString("http://");
+    GlobalString https = global new GlobalString("https://");
+               int beginindex;
+               int endindex;
+
+               if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) {
+                       return getHostName();
+               } 
+               else if (page.indexOf(https) != -1) {
+                       beginindex = page.indexOf(https) + https.length();
+               }
+               else {
+                       beginindex = page.indexOf(http) + http.length();
+               }
+         endindex = page.indexOf('/',beginindex+1);
+
+               if ((beginindex == -1)) {       
+                       System.printString("ERROR");
+               }
+               if (endindex == -1)
+                       endindex = page.length();
+
+               return page.subString(beginindex, endindex);
+  }
+
+  
+       public GlobalString getPathName(GlobalString page) {
+    GlobalString http = global new GlobalString("http://");
+    GlobalString https = global new GlobalString("https://");
+               int beginindex;
+               int nextindex;
+
+               if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) {
+      GlobalString path = getPath();
+           int lastindex = path.lastindexOf('/');
+           if (lastindex == -1)
+        return page;
+           
+      GlobalStringBuffer sb = global new GlobalStringBuffer(path.subString(0,lastindex+1));
+           sb.append(page);
+      return sb.toGlobalString();
+    } 
+               else if (page.indexOf(https) != -1) {
+                       beginindex = page.indexOf(https) + https.length();
+               }
+               else {
+                       beginindex = page.indexOf(http) + http.length();
+               }
+               nextindex = page.indexOf('/',beginindex+1);
+
+               if ((beginindex == -1) || (nextindex == -1))
+                       return global new GlobalString("index.html");
+               return page.subString(nextindex+1, page.length());
+  }
+}
index 2315b1e537910453360faa3f4aa26ffec443b829..1beeadbe305c51c05b060dff598b6638ca349b47 100644 (file)
@@ -40,23 +40,37 @@ public class LocalQuery {
 
        public String getHostName(String page) {
                String http = new String("http://");
-               if (page.indexOf(http) == -1) {
+               String https = new String("https://");
+               int beginindex;
+               int endindex;
+
+               if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) {
                        return getHostName();
-               } else {
-                       int beginindex = page.indexOf(http) + http.length();
-                       int endindex = page.indexOf('/',beginindex+1);
-                       if ((beginindex == -1)) {
-                               System.printString("ERROR");
-                       }
-                       if (endindex == -1)
-                               endindex=page.length();
-                       return page.subString(beginindex, endindex);
+               } 
+               else if (page.indexOf(https) != -1) {
+                       beginindex = page.indexOf(https) + https.length();
+               }
+               else {
+                       beginindex = page.indexOf(http) + http.length();
                }
+               endindex = page.indexOf('/',beginindex+1);
+
+               if ((beginindex == -1)) {
+                       System.printString("ERROR");
+               }
+               if (endindex == -1)
+                       endindex = page.length();
+
+               return page.subString(beginindex, endindex);
        }
 
        public String getPathName(String page) {
                String http = new String("http://");
-               if (page.indexOf(http) == -1) {
+               String https = new String("https://");
+               int beginindex;
+               int nextindex;
+
+               if ((page.indexOf(http) == -1) && (page.indexOf(https) == -1)) {
                        String path = getPath();
                        int lastindex = path.lastindexOf('/');
                        if (lastindex == -1)
@@ -65,12 +79,17 @@ public class LocalQuery {
                        StringBuffer sb = new StringBuffer(path.subString(0,lastindex+1));
                        sb.append(page);
                        return sb.toString();
-               } else {
-                       int beginindex = page.indexOf(http) + http.length();
-                       int nextindex = page.indexOf('/',beginindex+1);
-                       if ((beginindex==-1) || (nextindex==-1))
-                               return new String("index.html");
-                       return page.subString(nextindex+1, page.length());
                }
+               else if (page.indexOf(https) != -1) {
+                       beginindex = page.indexOf(https) + https.length();
+               }
+               else {
+                       beginindex = page.indexOf(http) + http.length();
+               }
+               nextindex = page.indexOf('/',beginindex+1);
+
+               if ((beginindex==-1) || (nextindex==-1))
+                       return new String("index.html");
+               return page.subString(nextindex+1, page.length());
        }
 }
index da9cc72be3e1b84f8aff16c0104d57b783447fe1..915bb4b9473919851339c713bd85f864015d083d 100644 (file)
@@ -1,7 +1,6 @@
 public class QueryQueue {
        HashSet queries;
        int size;
-  int ddddddddddd;
 
        public QueryQueue() {
                queries = new HashSet();
diff --git a/Robust/src/Benchmarks/Spider/recovery/QueryTask.java b/Robust/src/Benchmarks/Spider/recovery/QueryTask.java
new file mode 100644 (file)
index 0000000..e333984
--- /dev/null
@@ -0,0 +1,257 @@
+public class QueryTask extends Task {
+       int maxDepth;
+       Queue toprocess;
+       DistributedHashMap results;
+       GlobalString workingURL;
+
+  public QueryTask(Queue todoList, DistributedHashMap doneList, int maxDepth, DistributedHashMap results) {
+    this.todoList = todoList;
+               this.doneList = doneList;
+               this.maxDepth = maxDepth;
+               this.results = results;
+  }
+
+  public void execute() {
+               int depth;
+               int max;
+               
+               atomic {
+                       depth = ((GlobalQuery)myWork).getDepth();
+      max = this.maxDepth;
+               }
+
+               if (depth < max) {
+                       /* global variables */
+                       GlobalQuery gq;
+
+                       /* local variables */
+                       LocalQuery lq;
+                       String hostname;
+                       String path;
+
+                       atomic {
+                               gq = (GlobalQuery)myWork;
+                               hostname = new String(GlobalString.toLocalCharArray(gq.getHostName()));
+                               path = new String(GlobalString.toLocalCharArray(gq.getPath()));
+
+                               GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname);
+                               gsb.append("/");
+                               gsb.append(path);
+                               workingURL = global new GlobalString(gsb.toGlobalString());
+                       }
+                       lq = new LocalQuery(hostname, path, depth);
+
+                       System.printString(lq.getDepth()+" ");
+                       System.printString("Processing - Hostname : ");
+                       System.printString(hostname);
+                       System.printString(", Path : ");
+                       System.printString(path);
+                       System.printString("\n");
+
+                       Socket s = new Socket(hostname, 80);
+    
+                       requestQuery(hostname, path, s);
+                       readResponse(lq, s);
+
+                       atomic {
+                               processList(lq, workingURL, results);
+                       }
+
+                       atomic {
+                               toprocess = processPage(lq);
+                       }
+
+                       s.close();
+               }
+  }
+
+       public void done(Object obj) {
+               GlobalString str = global new GlobalString("true");
+               doneList.put(workingURL, str);
+
+               while(!toprocess.isEmpty()) {
+                       GlobalQuery q = (GlobalQuery)toprocess.pop();
+
+                       GlobalString hostname = global new GlobalString(q.getHostName());
+                       GlobalString path = global new GlobalString(q.getPath());
+
+                       GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname);
+                       gsb.append("/");
+                       gsb.append(path);
+
+                       if (!doneList.containsKey(gsb.toGlobalString())) {
+                               todoList.push(q);
+                       }
+               }
+       }
+
+       public static void requestQuery(String hostname, String path, Socket sock) {
+    StringBuffer req = new StringBuffer("GET "); 
+    req.append("/");
+               req.append(path);
+    req.append(" HTTP/1.1\r\nHost:");
+    req.append(hostname);
+    req.append("\r\n\r\n");
+    sock.write(req.toString().getBytes());
+  }
+
+       public static void readResponse(LocalQuery lq, Socket sock) {
+       //    state 0 - nothing
+       //    state 1 - \r
+       //    state 2 - \r\n
+       //    state 3 - \r\n\r
+       //    state 4 - \r\n\r\n
+    int state=0;
+    while(true) {
+      if (state<4) {
+        if (state==0) {
+          byte[] b=new byte[1];
+          int numchars=sock.read(b);
+          if ((numchars==1)) {
+            if (b[0]=='\r') {
+              state++;
+            }
+          } else
+                                               return;
+        } else if (state==1) {
+          byte[] b=new byte[1];
+          int numchars=sock.read(b);
+          if (numchars==1) {
+            if (b[0]=='\n')
+              state++;
+            else
+              state=0;
+          } else return;
+        } else if (state==2) {
+          byte[] b=new byte[1];
+          int numchars=sock.read(b);
+          if (numchars==1) {
+            if (b[0]=='\r')
+              state++;
+            else
+              state=0;
+          } else return;
+        } else if (state==3) {
+          byte[] b=new byte[1];
+          int numchars=sock.read(b);
+          if (numchars==1) {
+            if (b[0]=='\n')
+              state++;
+            else
+              state=0;
+          } else return;
+        }
+      } else {
+                               byte[] buffer=new byte[1024];
+        int numchars=sock.read(buffer);
+        if (numchars==0)
+          return;
+        else {
+          String curr=(new String(buffer)).subString(0,numchars);
+                                       lq.response.append(curr);
+        }
+      }
+    }
+  }
+
+       public static void processList(LocalQuery lq, GlobalString url, DistributedHashMap results) {
+               String sTitle = new String("<title>");  
+               String eTitle = new String("</title>");
+               String searchstr = lq.response.toString();
+               LinkedList ll;
+
+               int sIndex = searchstr.indexOf(sTitle);
+               if (sIndex != -1) {
+                       int eIndex = searchstr.indexOf(eTitle, sIndex+sTitle.length());
+                       String title = new String(searchstr.subString(sIndex+sTitle.length(), eIndex));
+                       ll = tokenize(title);
+
+                       Queue q;
+                       while (!ll.isEmpty()) {
+                               GlobalString word = global new GlobalString(ll.pop().toString());
+//                             q = (Queue)(results.get(word));
+
+//                             if (q == null) {
+                               if (!results.containsKey(word)) {
+                                       q = global new Queue();
+                               }
+                               else {
+                                       q = (Queue)(results.get(word));
+                               }
+                               q.push(url);
+                               results.put(word, q);
+
+                               System.out.println("Key : ["+word.toLocalString()+"],["+q.size()+"]");
+/*
+                               for (int i = 0; i < q.size(); i++) {
+                                       Object obj = q.elements[i];
+                                       GlobalString str = global new GlobalString((GlobalString)obj);
+                                       System.out.println("\t["+i+"] : "+str.toLocalString());
+                               }*/
+                       }
+               }
+       }
+
+       public static LinkedList tokenize(String str) {
+               LinkedList ll;
+               int sIndex = 0;
+               int eIndex = 0;
+               String token;
+
+               ll = new LinkedList();
+               
+               // and, or, of, at, but, '.', ',', ':' ';', '"', ' ', '-', '='
+               while (true) {
+                       eIndex = str.indexOf(' ', sIndex);
+                       if (eIndex == -1) {
+                               token = str.subString(sIndex);
+                               ll.add(token);
+                               break;
+                       }
+                       else {
+                               token = str.subString(sIndex, eIndex);
+                               ll.add(token);
+                               sIndex = eIndex+1;
+                       }
+               }
+               
+               return ll;
+       }
+       
+  public static Queue processPage(LocalQuery lq) {
+    int index = 0;
+       String href = new String("href=\"");
+       String searchstr = lq.response.toString();
+               int depth;
+       boolean cont = true;
+               Queue toprocess;
+
+               depth = lq.getDepth() + 1;
+
+               toprocess = global new Queue();
+
+               while(cont) {
+                       int mindex = searchstr.indexOf(href,index);
+                       if (mindex != -1) {     
+                               int endquote = searchstr.indexOf('"', mindex+href.length());
+               if (endquote != -1) {
+                     String match = searchstr.subString(mindex+href.length(), endquote);
+                                       String match2 = lq.makewebcanonical(match);
+       
+                                       GlobalString ghostname;
+                                       GlobalString gpath;
+
+                                       ghostname = global new GlobalString(lq.getHostName(match));
+                                       gpath = global new GlobalString(lq.getPathName(match));
+
+                     if (match2 != null) {
+                                                       GlobalQuery gq = global new GlobalQuery(ghostname, gpath, depth);
+                                                       toprocess.push(gq);
+                                       }
+                                       index = endquote;
+        } else cont = false;
+      } else cont = false;
+    }
+               return toprocess;
+  }
+}
index d69179ece4025cd455c890ce9b6eed731204c6c9..9335ef33c78b1d14dee1a605cac18e6efd0aabc5 100644 (file)
@@ -1,46 +1,42 @@
 public class Spider {
        public static void main(String[] args) {
-               int NUM_THREADS = 4;
-               int maxDepth = 5;
-               int searchDepth = 10;
+               int NUM_THREADS = 3;
+               int maxDepth = 3;
                int i, j;
                Work[] works;
-               QueryThread[] qt;
-               Query[] currentWorkList;
+               QueryTask[] qt;
+               GlobalQuery[] currentWorkList;
 
                NUM_THREADS = Integer.parseInt(args[0]);
+
+               if (args.length == 3) {
+                       maxDepth = Integer.parseInt(args[2]);
+               }
+
                GlobalString firstmachine;
-               GlobalString firstpage;
 
-//             int[] mid = getMID(NUM_THREADS);
                int mid[] = new int[NUM_THREADS];
-/*             mid[0] = (128<<24)|(195<<16)|(180<<8)|21;        //dc-4
-               mid[1] = (128<<24)|(195<<16)|(180<<8)|24;        //dc-5
-               mid[2] = (128<<24)|(195<<16)|(180<<8)|26;        //dc-6
-    */
-               mid[0] = (128<<24)|(195<<16)|(136<<8)|162;       //dc-1
-               mid[1] = (128<<24)|(195<<16)|(136<<8)|163;       //dc-2
-               mid[2] = (128<<24)|(195<<16)|(136<<8)|164;       //dc-3
-               mid[3] = (128<<24)|(195<<16)|(136<<8)|165;       //dc-3
-               mid[4] = (128<<24)|(195<<16)|(136<<8)|166;       //dc-3
-               mid[5] = (128<<24)|(195<<16)|(136<<8)|167;       //dc-3
+               mid[0] = (128<<24)|(195<<16)|(180<<8)|21;        
+               mid[1] = (128<<24)|(195<<16)|(180<<8)|24;        
+               mid[2] = (128<<24)|(195<<16)|(180<<8)|26;        
 
                atomic {
                        firstmachine = global new GlobalString(args[1]);
-                       firstpage = global new GlobalString(args[2]);
 
                        works = global new Work[NUM_THREADS];
-                       qt = global new QueryThread[NUM_THREADS];
-                       currentWorkList = global new Query[NUM_THREADS];
+                       qt = global new QueryTask[NUM_THREADS];
+                       currentWorkList = global new GlobalQuery[NUM_THREADS];
                        
-                       Query firstquery = global new Query(firstmachine, firstpage, 0);
+                       GlobalQuery firstquery = global new GlobalQuery(firstmachine);
 
                        Queue todoList = global new Queue();
-                       Queue doneList = global new Queue();
+                       DistributedHashMap doneList = global new DistributedHashMap(500, 500, 0.75f);
+                       DistributedHashMap results = global new DistributedHashMap(100, 100, 0.75f);
+                       
                        todoList.push(firstquery);
 
                        for (i = 0; i < NUM_THREADS; i++) {
-                               qt[i] = global new QueryThread(todoList, doneList, maxDepth, searchDepth);
+                               qt[i] = global new QueryTask(todoList, doneList, maxDepth, results);
                                works[i] = global new Work(qt[i], NUM_THREADS, i, currentWorkList);
                        }
                }
@@ -61,44 +57,4 @@ public class Spider {
                        tmp.join();
                }
        }
-
-       public static int[] getMID (int num_threads) {
-               int[] mid = new int[num_threads];
-
-               FileInputStream ifs = new FileInputStream("dstm.conf");
-               String str;
-               String sub;
-               int fromIndex;
-               int endIndex;
-               double num;
-
-               for (int i = 0; i < num_threads; i++) { 
-                       int power = 3 - i;
-                       fromIndex = 0;
-                       num = 0;
-
-                       str = ifs.readLine();
-
-                       endIndex = str.indexOf('.', fromIndex);
-                       sub = str.subString(fromIndex, endIndex);
-                       num += (Integer.parseInt(sub) << 24);
-
-                       fromIndex = endIndex + 1;
-                       endIndex = str.indexOf('.', fromIndex);
-                       sub = str.subString(fromIndex, endIndex);
-                       num += (Integer.parseInt(sub) << 16);
-
-                       fromIndex = endIndex + 1;
-                       endIndex = str.indexOf('.', fromIndex);
-                       sub = str.subString(fromIndex, endIndex);
-                       num += (Integer.parseInt(sub) << 8);
-
-                       fromIndex = endIndex + 1;
-                       sub = str.subString(fromIndex);
-                       num += Integer.parseInt(sub);
-
-                       mid[i] = (int)num;
-               }
-               return mid;
-       }
 }
index 1ecc1322080b2ed2cd7d7895048bee52b28a12f1..935ef319989f4ce0e206e3baba3058fa719508a7 100644 (file)
@@ -1,9 +1,3 @@
-#128.195.180.21
-#128.195.180.24
-#128.195.180.26
-128.195.136.162
-128.195.136.163
-128.195.136.164
-128.195.136.165
-128.195.136.166
-128.195.136.167
+128.195.180.21
+128.195.180.24
+128.195.180.26
index c9ab7801996314eb33f9a489aaa98581de8aeeec..841242a08f365863fb5d4663cbec2c1f4d73862e 100644 (file)
@@ -1,18 +1,13 @@
 MAINCLASS=Spider
 SUBCLASS=Query
 SRC1=${MAINCLASS}.java
-SRC2=${SUBCLASS}.java
+SRC2=Global${SUBCLASS}.java
 SRC3=${SUBCLASS}Queue.java
 SRC4=${SUBCLASS}Task.java
-FLAGS= -dsm -dsmtask -32bit -nooptimize -debug -recovery -mainclass ${MAINCLASS}
+FLAGS= -recovery -dsmtask -dsm -dsmtask -32bit -nooptimize -debug -mainclass ${MAINCLASS}
 default:
        ../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC2} ${SRC3} ${SRC4} ${SRC1}
 
 clean:
        rm -rf tmpbuilddirectory
        rm *.bin
-       rm *.php
-       rm *.css
-       rm www*
-       rm eee*
-       rm web*
index 76d16fb4139da705a16d74a8d51318f810914886..b81ca7774db5456e7a9c8ba0e322261109821e17 100644 (file)
@@ -38,11 +38,11 @@ public class DistributedHashMap {
     for(int i=0; i<oldtable.length; i++) {
       DHashEntry e=oldtable[i];
       while(e!=null) {
-       DHashEntry next=e.next;
-       int bin=hash2(e.hashval, table.length, newCapacity);
-       e.next=newtable[bin];
-       newtable[bin]=e;
-       e=next;
+                               DHashEntry next=e.next;
+                               int bin=hash2(e.hashval, table.length, newCapacity);
+                               e.next=newtable[bin];
+                               newtable[bin]=e;
+                               e=next;
       }
     }
   }
@@ -58,19 +58,19 @@ public class DistributedHashMap {
 
     if (ptr!=null) {
       if (ptr.hashval==hashcode&&ptr.key.equals(key)) {
-       dhe.array[index2]=ptr.next;
-       dhe.count--;
-       return ptr.value;
+                               dhe.array[index2]=ptr.next;
+                               dhe.count--;
+                               return ptr.value;
       }
       while(ptr.next!=null) {
-       if (ptr.hashval==hashcode&&ptr.next.key.equals(key)) {
-         Object oldvalue=ptr.value;
-         ptr.next=ptr.next.next;
-         dhe.count--;
-         return oldvalue;
-       }
-       ptr=ptr.next;
-      }
+                               if (ptr.hashval==hashcode&&ptr.next.key.equals(key)) {
+                                       Object oldvalue=ptr.value;
+                                       ptr.next=ptr.next.next;
+                                       dhe.count--;
+                                       return oldvalue;
+                               }
+                               ptr=ptr.next;
+                       }
     }
     return null;
   }
@@ -88,7 +88,7 @@ public class DistributedHashMap {
     while(ptr!=null) {
       if (ptr.hashval==hashcode
           &&ptr.key.equals(key)) {
-       return ptr.value;
+                               return ptr.value;
       }
       ptr=ptr.next;
     }
@@ -107,7 +107,7 @@ public class DistributedHashMap {
     while(ptr!=null) {
       if (ptr.hashval==hashcode
           &&ptr.key.equals(key)) {
-       return true;
+                               return true;
       }
       ptr=ptr.next;
     }
@@ -127,9 +127,9 @@ public class DistributedHashMap {
 
     while(ptr!=null) {
       if (ptr.hashval==hashcode&&ptr.key.equals(key)) {
-       Object oldvalue=ptr.value;
-       ptr.value=value;
-       return oldvalue;
+                               Object oldvalue=ptr.value;
+                               ptr.value=value;
+                               return oldvalue;
       }
       ptr=ptr.next;
     }
@@ -148,6 +148,10 @@ public class DistributedHashMap {
     }
     return null;
   }
+       
+       public int size() {
+               return table.length;
+       }
 }
 
 
index 355cc4cca5470a3c3f499b424ba4323e5497c0f2..a6b7f8bea19c2ab31f600eb9a8a2383d30e40f04 100644 (file)
@@ -1,6 +1,6 @@
 public class Task {
   Queue todoList;
-       Queue doneList;
+       DistributedHashMap doneList;
   Object myWork;
 
        Task() {}
@@ -25,6 +25,7 @@ public class Task {
                atomic {
                        o = todoList.pop();
                }
+//             System.out.println("Size of TodoList : " + todoList.size());
                return o;
        }