*** empty log message ***
authorhkhang <hkhang>
Mon, 9 Nov 2009 01:15:13 +0000 (01:15 +0000)
committerhkhang <hkhang>
Mon, 9 Nov 2009 01:15:13 +0000 (01:15 +0000)
Robust/src/Benchmarks/Spider/recovery/GlobalQuery.java
Robust/src/Benchmarks/Spider/recovery/QueryTask.java
Robust/src/Benchmarks/Spider/recovery/Spider.java
Robust/src/ClassLibrary/InetAddress.java
Robust/src/ClassLibrary/Java/Socket.java
Robust/src/Runtime/socket.c

index 0a9d1630d524ae0fe1e632ef2ec1992c358f5d76..7efff695e90dcd45efd5d6717d7fa50c8b4663aa 100644 (file)
@@ -4,7 +4,7 @@ public class GlobalQuery {
        int depth;
   
        public GlobalQuery(GlobalString hostname) {
-               this.hostname = global new GlobalString(hostname);
+               this.hostname = hostname;
                this.path = global new GlobalString("");
                this.depth = 0;
        }
index 9c0b5a11183ed728bacd53a017b8c3956012b97f..743d63e01be6b0d7c7c73a0aeeb1fbe0c18e6178 100644 (file)
@@ -10,6 +10,7 @@ public class QueryTask extends Task {
                this.doneList = doneList;
                this.maxDepth = maxDepth;
                this.results = results;
+               toprocess = global new Queue();
   }
 
   public void execute() {
@@ -51,8 +52,16 @@ public class QueryTask extends Task {
                        System.printString(path);
                        System.printString("\n");
 
-                       Socket s = new Socket(hostname, 80);
-    
+                       if (isDocument(path)) {
+                               return;
+                       }
+
+                       Socket s = new Socket();
+
+                       if(s.connect(hostname, 80) == -1) {
+                               return;
+                       }
+
                        requestQuery(hostname, path, s);
                        readResponse(lq, s);
 
@@ -60,15 +69,27 @@ public class QueryTask extends Task {
                                atomic {
                                        gTitle = global new GlobalString(title);
                                }
+                               atomic {
+                                       toprocess = processPage(lq);
+                               }
                        }
-
-                       atomic {
-                               toprocess = processPage(lq);
-                       }
-
                        s.close();
                }
   }
+       
+       public static boolean isDocument(String str) {
+               int index = str.lastindexOf('.');
+
+               if (index != -1) {
+                       if ((str.subString(index+1)).equals("pdf")) return true;
+                       else if ((str.subString(index+1)).equals("ps")) return true;
+                       else if ((str.subString(index+1)).equals("ppt")) return true;
+                       else if ((str.subString(index+1)).equals("pptx")) return true;
+                       else if ((str.subString(index+1)).equals("jpg")) return true;
+                       else return false;
+               }
+               return false;
+       }
 
        public void done(Object obj) {
                if (gTitle != null) 
@@ -95,22 +116,46 @@ public class QueryTask extends Task {
        }
 
        public static String grabTitle(LocalQuery lq) {
-               String sTitle = new String("<title>");  
-               String eTitle = new String("</title>");
+               String sBrace = new String("<");        
+               String strTitle = new String("title>");
        String searchstr = lq.response.toString();
                String title = null;
                char ch;
 
-               int mindex = searchstr.indexOf(sTitle);
-               if (mindex != -1) {
-                       int endquote = searchstr.indexOf(eTitle, mindex+sTitle.length());
+               int mindex = -1;
+               int endquote = -1;
+               int i, j;
+               String tmp;
+
+               for (i = 0; i < searchstr.length(); i++) {
+                       if (searchstr.charAt(i) == '<') {
+                               i++;
+                               if (searchstr.length() > (i+strTitle.length())) {
+                                       tmp = searchstr.subString(i, i+strTitle.length());
+                                       if (tmp.equalsIgnoreCase("title>")) {
+                                               mindex = i + tmp.length();
+                                               for (j = mindex; j < searchstr.length(); j++) {
+                                                       if (searchstr.charAt(j) == '<') {
+                                                               j++;
+                                                               tmp = searchstr.subString(j, j+strTitle.length()+1);                    
+                                                               if (tmp.equalsIgnoreCase("/title>")) {
+                                                                       endquote = j - 1;
+                                                                       break;
+                                                               }
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+               }
 
-                       title = new String(searchstr.subString(mindex+sTitle.length(), endquote));
-                       
+               if (mindex != -1) {
+                       title = searchstr.subString(mindex, endquote);
                        if (Character.isWhitespace(title.charAt(0))){
                                mindex=0;
                                while (Character.isWhitespace(title.charAt(mindex++)));
                                mindex--;
+                               if (mindex >= title.length()) return null;
                                title = new String(title.subString(mindex));
                        }
 
@@ -118,23 +163,30 @@ public class QueryTask extends Task {
                                endquote=title.length()-1;
                                while (Character.isWhitespace(title.charAt(endquote--)));
                                endquote += 2;
+                               if (mindex >= endquote) return null;
                                title = new String(title.subString(0, endquote));
                        }
 
-                       if (errorPage(title)) 
-                               title = null;
+                       if (isErrorPage(title)) {
+                               return null;
+                       }
                }
+//             System.out.println("Title = [" + title + "]");
 
                return title;
        }
 
-       public static boolean errorPage(String str) {
+       public static boolean isErrorPage(String str) { 
                if (str.equals("301 Moved Permanently")) 
                        return true;
                else if (str.equals("302 Found")) 
                        return true;
                else if (str.equals("404 Not Found")) 
                        return true;
+               else if (str.equals("403 Forbidden")) 
+                       return true;
+               else if (str.equals("404 File Not Found")) 
+                       return true;
                else
                        return false;
        }
@@ -143,12 +195,32 @@ public class QueryTask extends Task {
     StringBuffer req = new StringBuffer("GET "); 
     req.append("/");
                req.append(path);
-    req.append(" HTTP/1.1\r\nHost:");
+         req.append(" HTTP/1.0\r\nHost: ");
     req.append(hostname);
     req.append("\r\n\r\n");
     sock.write(req.toString().getBytes());
   }
 
+       public static void readResponse(LocalQuery lq, Socket sock) {
+       //    state 0 - nothing
+       //    state 1 - \r
+       //    state 2 - \r\n
+       //    state 3 - \r\n\r
+       //    state 4 - \r\n\r\n
+               byte[] buffer = new byte[1024];
+               int numchars;
+
+               do {
+                       numchars = sock.read(buffer);
+
+                       String curr = (new String(buffer)).subString(0, numchars);
+                       
+                       lq.response.append(curr);
+                       buffer = new byte[1024];
+               } while(numchars > 0);
+  }
+
+/*
        public static void readResponse(LocalQuery lq, Socket sock) {
        //    state 0 - nothing
        //    state 1 - \r
@@ -202,12 +274,13 @@ public class QueryTask extends Task {
           return;
         else {
           String curr=(new String(buffer)).subString(0,numchars);
+//                                     System.out.println("numchars = "+numchars);
                                        lq.response.append(curr);
         }
       }
     }
   }
-
+*/
        public void processList() {
                LinkedList ll;
                GlobalString token = null;
@@ -236,7 +309,7 @@ public class QueryTask extends Task {
                        }
                        q.push(workingURL);     
                        results.put(token, q);
-                       System.out.println("Key : ["+token.toLocalString()+"],["+q.size()+"]");
+//                     System.out.println("Key : ["+token.toLocalString()+"],["+q.size()+"]");
                }
        }
 
@@ -251,9 +324,11 @@ public class QueryTask extends Task {
                else if (str.equals("or")) return true;
                else if (str.equals("but")) return true;
                else if (str.equals("to")) return true;
+               else if (str.equals("The")) return true;
                else if (str.equals(".")) return true;
-               else if (str.equals("=")) return true;
                else if (str.equals("-")) return true;
+               else if (str.equals("=")) return true;
+               else if (str.equals("_")) return true;
                else if (str.equals(":")) return true;
                else if (str.equals(";")) return true;
                else if (str.equals("\'")) return true;
@@ -261,6 +336,7 @@ public class QueryTask extends Task {
                else if (str.equals("|")) return true;
                else if (str.equals("@")) return true;
                else if (str.equals("&")) return true;
+               else if (str.equals(" ")) return true;
                else return false;
        }
 
@@ -274,6 +350,9 @@ public class QueryTask extends Task {
                if (str.charAt(0) == '&') {             // &
                        return str.subString(1);
                }
+               else if (str.charAt(0) == '/') {                // &
+                       return str.subString(1);
+               }
                return str;
        }
 
@@ -294,6 +373,11 @@ public class QueryTask extends Task {
                        if (str.charAt(str.length()-2) == '\'')
                                return str.subString(0, str.length()-2);        
                }
+               else if (str.charAt(str.length()-1) == '-') {
+                       int index = str.length()-2;
+                       while (Character.isWhitespace(str.charAt(index--)));
+                       return str.subString(0, index+2);
+               }
                return str;
        }
        
index 356d6fa873d66ae98250793e9dcc221ec010a17a..54aea28888f3ffc567b8249dcb590f46642827a6 100644 (file)
@@ -19,9 +19,9 @@ public class Spider {
 //             mid[0] = (128<<24)|(195<<16)|(180<<8)|21;        
 //             mid[1] = (128<<24)|(195<<16)|(180<<8)|24;        
 //             mid[2] = (128<<24)|(195<<16)|(180<<8)|26;        
-               mid[0] = (128<<24)|(195<<16)|(136<<8)|162;       
-               mid[1] = (128<<24)|(195<<16)|(136<<8)|163;       
-               mid[2] = (128<<24)|(195<<16)|(136<<8)|164;       
+               mid[0] = (128<<24)|(195<<16)|(136<<8)|162;
+               mid[1] = (128<<24)|(195<<16)|(136<<8)|163;
+               mid[2] = (128<<24)|(195<<16)|(136<<8)|164;
 
                atomic {
                        firstmachine = global new GlobalString(args[1]);
index fee8efff96adf79b1fef65cdd9b42180da14becf..8637cd83b0c6fe7cef31e9b9ab5c3657a0f70bc3 100644 (file)
@@ -13,7 +13,10 @@ public class InetAddress {
 
   public static InetAddress getByName(String hostname) {
     InetAddress[] addresses=getAllByName(hostname);
-    return addresses[0];
+               if (addresses != null) 
+           return addresses[0];
+               else 
+                       return null;
   }
 
   public byte[] getAddress() {
@@ -40,12 +43,16 @@ public class InetAddress {
 
     byte[][] iplist = InetAddress.getHostByName(hostname.getBytes());
 
-    addresses = new InetAddress[iplist.length];
+               if (iplist != null) {
+                       addresses = new InetAddress[iplist.length];
 
-    for (int i = 0; i < iplist.length; i++) {
-      addresses[i] = new InetAddress(iplist[i], hostname);
-    }
-    return addresses;
+                       for (int i = 0; i < iplist.length; i++) {
+                               addresses[i] = new InetAddress(iplist[i], hostname);
+                       }
+                       return addresses;
+               }
+               else 
+                       return null;
   }
 
   public static native byte[][] getHostByName(byte[] hostname);
index 297fe3d066532a2d964bdf23b37a5cc0c738f7d8..5863a518256c9c898522c12f789215e5705fbb00 100644 (file)
@@ -32,6 +32,18 @@ public class Socket {
     sout=new SocketOutputStream(this);
   }
 
+       public int connect(String host, int port) {
+    InetAddress address=InetAddress.getByName(host);
+               if (address != null) {
+                       fd=nativeBind(address.getAddress(), port);
+                       nativeConnect(fd, address.getAddress(), port);
+                       return 0;
+               }
+               else {
+                       return -1;
+               }
+       }
+
   public static native int nativeBind(byte[] address, int port);
 
   public static native int nativeConnect(int fd, byte[] address, int port);
index 0b85503691b80a8da8b9e0f142ce95ea5bc94cc0..7c079ac095d87e1abb93c0b2662ac1f71aba380c 100644 (file)
@@ -153,7 +153,8 @@ struct ArrayObject * CALL01(___InetAddress______getHostByName_____AR_B, struct A
   h=gethostbyname(str);
   free(str);
 
-  for (n=0; h->h_addr_list[n]; n++) /* do nothing */ ;
+       if (h != NULL) {
+               for (n=0; h->h_addr_list[n]; n++) /* do nothing */ ;
 
 #ifdef PRECISE_GC
   arraybytearray=allocate_newarray(___params___,BYTEARRAYARRAYTYPE,n);
@@ -180,6 +181,9 @@ struct ArrayObject * CALL01(___InetAddress______getHostByName_____AR_B, struct A
 
   return arraybytearray;
 #endif
+       }
+       else
+               return NULL;
 }