+++ /dev/null
-public class Query {
- GlobalString hostname;
- GlobalString path;
- int depth;
-
- public Query(GlobalString hostname, GlobalString path, int depth) {
- this.hostname = global new GlobalString(hostname);
- this.path = global new GlobalString(path);
- this.depth = depth;
- }
-
- public int getDepth() {
- return depth;
- }
-
- public GlobalString getHostName() {
- return hostname;
- }
-
- public GlobalString getPath() {
- return path;
- }
-
- public GlobalString getHostName(GlobalString page) {
- GlobalString http = global new GlobalString("http://");
- if (page.indexOf(http) == -1) {
- return getHostName();
- } else {
- int beginindex = page.indexOf(http) + http.length();
- int endindex = page.indexOf('/',beginindex+1);
- if ((beginindex == -1)) {
- System.printString("ERROR");
- }
- if (endindex == -1)
- endindex = page.length();
- return page.subString(beginindex, endindex);
- }
- }
-
-
- public GlobalString getPathName(GlobalString page) {
- GlobalString http = global new GlobalString("http://");
- if (page.indexOf(http) == -1) {
- GlobalString path = getPath();
- int lastindex = path.lastindexOf('/');
- if (lastindex == -1)
- return page;
-
- GlobalStringBuffer sb = global new GlobalStringBuffer(path.subString(0,lastindex+1));
- sb.append(page);
- return sb.toGlobalString();
- } else {
- int beginindex = page.indexOf(http)+http.length();
- int nextindex = page.indexOf('/',beginindex+1);
- if ((beginindex == -1) || (nextindex == -1))
- return global new GlobalString("index.html");
- return page.subString(nextindex+1, page.length());
- }
- }
-}
+++ /dev/null
-public class QueryList extends Queue {
- Queue queries;
-
- public QueryList() {
- queries = global new Queue();
- }
-
- public boolean checkQuery(GlobalString x) {
- boolean set = false;;
- for (int i = 0 ; i < size; i++) {
- if (x.equals((GlobalString)elements[i])) {
- set = true;
- break;
- }
- }
- return set;
- }
-
- public void addQuery(GlobalString x) {
- queries.push(x);
- }
-}
+++ /dev/null
-public class QueryQueue {
- HashSet queries;
- int size;
-
- public QueryQueue() {
- queries = new HashSet();
- size = 0;
- }
-
- public LocalQuery pop() {
- if (queries.isEmpty())
- return null;
- LocalQuery q = (LocalQuery) queries.iterator().next();
- queries.remove(q);
- size--;
- return q;
- }
-
- public void push(LocalQuery x) {
- queries.add(x);
- size++;
- }
-
- public int size() {
- return size;
- }
-
- public boolean isEmpty() {
- if (size == 0)
- return true;
- else
- return false;
- }
-}
int maxDepth;
Queue toprocess;
DistributedHashMap results;
+ GlobalString gTitle;
GlobalString workingURL;
public QueryTask(Queue todoList, DistributedHashMap doneList, int maxDepth, DistributedHashMap results) {
LocalQuery lq;
String hostname;
String path;
+ String title;
atomic {
gq = (GlobalQuery)myWork;
gsb.append("/");
gsb.append(path);
workingURL = global new GlobalString(gsb.toGlobalString());
+ gTitle = null;
}
lq = new LocalQuery(hostname, path, depth);
- System.printString(lq.getDepth()+" ");
+ System.printString("["+lq.getDepth()+"] ");
System.printString("Processing - Hostname : ");
System.printString(hostname);
System.printString(", Path : ");
requestQuery(hostname, path, s);
readResponse(lq, s);
- atomic {
- processList(lq, workingURL, results);
+ if ((title = grabTitle(lq)) != null) {
+ atomic {
+ gTitle = global new GlobalString(title);
+ }
}
atomic {
}
public void done(Object obj) {
+ if (gTitle != null)
+ processList();
+
GlobalString str = global new GlobalString("true");
+
doneList.put(workingURL, str);
while(!toprocess.isEmpty()) {
}
}
+ public static String grabTitle(LocalQuery lq) {
+ String sTitle = new String("<title>");
+ String eTitle = new String("</title>");
+ String searchstr = lq.response.toString();
+ String title = null;
+
+ int mindex = searchstr.indexOf(sTitle);
+ if (mindex != -1) {
+ int endquote = searchstr.indexOf(eTitle, mindex+sTitle.length());
+ title = new String(searchstr.subString(mindex+sTitle.length(), endquote));
+ }
+
+ return title;
+ }
+
public static void requestQuery(String hostname, String path, Socket sock) {
StringBuffer req = new StringBuffer("GET ");
req.append("/");
}
}
- public static void processList(LocalQuery lq, GlobalString url, DistributedHashMap results) {
- String sTitle = new String("<title>");
- String eTitle = new String("</title>");
- String searchstr = lq.response.toString();
+ public void processList() {
LinkedList ll;
+ GlobalString token = null;
+ int mindex = 0;
+ int endquote = 0;
- int sIndex = searchstr.indexOf(sTitle);
- if (sIndex != -1) {
- int eIndex = searchstr.indexOf(eTitle, sIndex+sTitle.length());
- String title = new String(searchstr.subString(sIndex+sTitle.length(), eIndex));
- ll = tokenize(title);
-
- Queue q;
- while (!ll.isEmpty()) {
- GlobalString word = global new GlobalString(ll.pop().toString());
-// q = (Queue)(results.get(word));
+ while (endquote != -1) {
+ endquote = gTitle.indexOf(' ', mindex);
-// if (q == null) {
- if (!results.containsKey(word)) {
- q = global new Queue();
- }
- else {
- q = (Queue)(results.get(word));
+ if (endquote != -1) {
+ token = gTitle.subString(mindex, endquote);
+ mindex = endquote + 1;
+ if (censor(token)) {
+ continue;
}
- q.push(url);
- results.put(word, q);
+ token = refinement(token);
+ }
+ else {
+ token = gTitle.subString(mindex);
+ token = refinement(token);
+ }
- System.out.println("Key : ["+word.toLocalString()+"],["+q.size()+"]");
/*
- for (int i = 0; i < q.size(); i++) {
- Object obj = q.elements[i];
- GlobalString str = global new GlobalString((GlobalString)obj);
- System.out.println("\t["+i+"] : "+str.toLocalString());
- }*/
+ Queue q;
+ if ((q = (Queue)(results.remove(token))) == null) {
+ q = global new Queue();
}
+ else {
+ q = (Queue)(results.get(token));
+ }
+ // bug here <- object id changed??
+ q.push(workingURL);
+ results.put(token, q);
+
+ System.out.println("Key : ["+token.toLocalString()+"],["+q.size()+"]");
+ */
}
}
- public static LinkedList tokenize(String str) {
- LinkedList ll;
- int sIndex = 0;
- int eIndex = 0;
- String token;
+ public boolean censor(GlobalString str) {
+ if (str.equals("of")) return true;
+ else if (str.equals("for")) return true;
+ else if (str.equals("a")) return true;
+ else if (str.equals("an")) return true;
+ else if (str.equals("the")) return true;
+ else if (str.equals("at")) return true;
+ else if (str.equals("and")) return true;
+ else if (str.equals("or")) return true;
+ else if (str.equals("but")) return true;
+ else if (str.equals(".")) return true;
+ else if (str.equals("=")) return true;
+ else if (str.equals("-")) return true;
+ else if (str.equals(":")) return true;
+ else if (str.equals(";")) return true;
+ else if (str.equals("\'")) return true;
+ else if (str.equals("\"")) return true;
+ else if (str.equals("@")) return true;
+ else return false;
+ }
- ll = new LinkedList();
-
- // and, or, of, at, but, '.', ',', ':' ';', '"', ' ', '-', '='
- while (true) {
- eIndex = str.indexOf(' ', sIndex);
- if (eIndex == -1) {
- token = str.subString(sIndex);
- ll.add(token);
- break;
- }
- else {
- token = str.subString(sIndex, eIndex);
- ll.add(token);
- sIndex = eIndex+1;
- }
+ public GlobalString refinement(GlobalString str) {
+ if (str.charAt(str.length()-1) == ',') {
+ return str.subString(0, str.length()-1);
}
-
- return ll;
+ else if (str.charAt(str.length()-1) == ':') {
+ return str.subString(0, str.length()-1);
+ }
+ else if (str.charAt(str.length()-1) == 's') {
+ if (str.charAt(str.length()-2) == '\'')
+ return str.subString(0, str.length()-2);
+ }
+ return str;
}
public static Queue processPage(LocalQuery lq) {
depth = lq.getDepth() + 1;
toprocess = global new Queue();
-
while(cont) {
int mindex = searchstr.indexOf(href,index);
if (mindex != -1) {
+++ /dev/null
-public class QueryThread extends Task {
- int maxDepth;
- int maxSearchDepth;
-
- public QueryThread(Queue todoList, Queue doneList, int maxDepth, int maxSearchDepth) {
- this.todoList = todoList;
- this.doneList = doneList;
- this.maxDepth = maxDepth;
- this.maxSearchDepth = maxSearchDepth;
- }
-
- public void execute() {
- int depth;
- int max;
- int maxSearch;
-
- atomic {
- depth = ((Query)myWork).getDepth();
- max = this.maxDepth;
- maxSearch = this.maxSearchDepth;
- }
-
- if (depth < max) {
- /* global variables */
- Query q;
- GlobalString ghostname;
- GlobalString gpath;
-
- /* local variables */
- QueryQueue toprocess;
- LocalQuery lq;
- String hostname;
- String path;
-
- atomic {
- q = (Query)myWork;
- ghostname = q.getHostName();
- gpath = q.getPath();
- hostname = new String(GlobalString.toLocalCharArray(ghostname));
- path = new String(GlobalString.toLocalCharArray(gpath));
- }
- lq = new LocalQuery(hostname, path, depth);
-
- System.printString("Processing - Hostname : ");
- System.printString(hostname);
- System.printString(", Path : ");
- System.printString(path);
- System.printString("\n");
-
- Socket s = new Socket(hostname, 80);
-
- requestQuery(hostname, path, s);
- readResponse(lq, s);
- toprocess = processPage(lq,maxSearch);
- s.close();
-
- atomic {
- while(!toprocess.isEmpty()) {
- lq = toprocess.pop();
- ghostname = global new GlobalString(lq.getHostName());
- gpath = global new GlobalString(lq.getPath());
-
- q = global new Query(ghostname, gpath, lq.getDepth());
- todoList.push(q);
- }
- }
- }
- }
-
- public static void requestQuery(String hostname, String path, Socket sock) {
- StringBuffer req = new StringBuffer("GET ");
- req.append("/");
- req.append(path);
- req.append(" HTTP/1.1\r\nHost:");
- req.append(hostname);
- req.append("\r\n\r\n");
- sock.write(req.toString().getBytes());
- }
-
- public static void readResponse(LocalQuery lq, Socket sock) {
- // state 0 - nothing
- // state 1 - \r
- // state 2 - \r\n
- // state 3 - \r\n\r
- // state 4 - \r\n\r\n
- int state=0;
- while(true) {
- if (state<4) {
- if (state==0) {
- byte[] b=new byte[1];
- int numchars=sock.read(b);
- if ((numchars==1)) {
- if (b[0]=='\r') {
- state++;
- }
- } else
- return;
- } else if (state==1) {
- byte[] b=new byte[1];
- int numchars=sock.read(b);
- if (numchars==1) {
- if (b[0]=='\n')
- state++;
- else
- state=0;
- } else return;
- } else if (state==2) {
- byte[] b=new byte[1];
- int numchars=sock.read(b);
- if (numchars==1) {
- if (b[0]=='\r')
- state++;
- else
- state=0;
- } else return;
- } else if (state==3) {
- byte[] b=new byte[1];
- int numchars=sock.read(b);
- if (numchars==1) {
- if (b[0]=='\n')
- state++;
- else
- state=0;
- } else return;
- }
- } else {
- byte[] buffer=new byte[1024];
- int numchars=sock.read(buffer);
- if (numchars==0)
- return;
- else {
- String curr=(new String(buffer)).subString(0,numchars);
- lq.response.append(curr);
- }
- }
- }
- }
-
- public void done(Object obj) {
- doneList.push(obj);
- }
-
- public static QueryQueue processPage(LocalQuery lq,int maxSearchDepth) {
- int index = 0;
- String href = new String("href=\"");
- String searchstr = lq.response.toString();
- int depth;
- boolean cont = true;
-
- QueryQueue toprocess = new QueryQueue();
- depth = lq.getDepth() + 1;
-
- int searchDepthCnt = 0;
- while(cont && (searchDepthCnt < maxSearchDepth)) {
- int mindex = searchstr.indexOf(href,index);
- if (mindex != -1) {
- int endquote = searchstr.indexOf('"', mindex+href.length());
- if (endquote != -1) {
- String match = searchstr.subString(mindex+href.length(), endquote);
- String match2 = lq.makewebcanonical(match);
-
- if (match2 != null) {
- LocalQuery newlq = new LocalQuery(lq.getHostName(match), lq.getPathName(match), depth);
-
- toprocess.push(newlq);
- searchDepthCnt++;
- }
- index = endquote;
- } else cont = false;
- } else cont = false;
- }
-
- return toprocess;
- }
-}
GlobalString firstmachine;
int mid[] = new int[NUM_THREADS];
- mid[0] = (128<<24)|(195<<16)|(180<<8)|21;
- mid[1] = (128<<24)|(195<<16)|(180<<8)|24;
- mid[2] = (128<<24)|(195<<16)|(180<<8)|26;
+// mid[0] = (128<<24)|(195<<16)|(180<<8)|21;
+// mid[1] = (128<<24)|(195<<16)|(180<<8)|24;
+// mid[2] = (128<<24)|(195<<16)|(180<<8)|26;
+ mid[0] = (128<<24)|(195<<16)|(136<<8)|162;
+ mid[1] = (128<<24)|(195<<16)|(136<<8)|163;
+ mid[2] = (128<<24)|(195<<16)|(136<<8)|164;
atomic {
firstmachine = global new GlobalString(args[1]);
-128.195.180.21
-128.195.180.24
-128.195.180.26
+#128.195.180.21
+#128.195.180.24
+#128.195.180.26
+128.195.136.162
+128.195.136.163
+128.195.136.164
SUBCLASS=Query
SRC1=${MAINCLASS}.java
SRC2=Global${SUBCLASS}.java
-SRC3=${SUBCLASS}Queue.java
-SRC4=${SUBCLASS}Task.java
+SRC3=${SUBCLASS}Task.java
FLAGS= -recovery -dsmtask -dsm -dsmtask -32bit -nooptimize -debug -mainclass ${MAINCLASS}
default:
- ../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC2} ${SRC3} ${SRC4} ${SRC1}
+ ../../../buildscript ${FLAGS} -o ${MAINCLASS} ${SRC2} ${SRC3} ${SRC1}
clean:
rm -rf tmpbuilddirectory