1 public class QueryThread extends Thread {
5 DistributedHashMap doneList;
7 GlobalQuery[] currentWorkList;
9 DistributedHashMap results;
12 GlobalString workingURL;
15 public QueryThread(Queue todoList, DistributedHashMap doneList, DistributedHashMap results,int maxDepth,int mid,int NUM_THREADS,GlobalQuery[] currentWorkList) {
16 this.todoList = todoList;
17 this.doneList = doneList;
18 this.results = results;
19 this.maxDepth = maxDepth;
20 this.currentWorkList = currentWorkList;
22 this.NUM_THREADS = NUM_THREADS;
33 System.out.println("Thread " + workMID + " has started");
39 myWork = (GlobalQuery)todoList.pop();
41 if(null == myWork) // no work in todolist
43 chk = checkCurrentWorkList(this);
46 currentWorkList[workMID] = myWork;
51 if(chk == 1) { // it has query
52 QueryThread.execute(this);
56 currentWorkList[workMID] = null;
59 else if(chk == -1) { // finished all work
62 else { // wait for other thread
69 System.out.println("\n\nDoneSize = " + doneList.size());
72 System.out.println("\n\n\n I'm done");
75 public static int checkCurrentWorkList(QueryThread qt) {
84 num_threads = qt.NUM_THREADS;
86 for(i = 0 ; (i < num_threads); i++) {
91 s = qt.currentWorkList[i];
101 if(chk == false) // wait for other machine's work
104 return 0; // others are still working wait until they finish work
107 public static void execute(QueryThread qt) {
112 depth = qt.myWork.getDepth();
117 /* global variables */
120 /* local variables */
128 hostname = new String(GlobalString.toLocalCharArray(gq.getHostName()));
129 path = new String(GlobalString.toLocalCharArray(gq.getPath()));
131 GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname);
134 qt.workingURL = global new GlobalString(gsb.toGlobalString());
137 lq = new LocalQuery(hostname, path, depth);
139 System.printString("["+lq.getDepth()+"] ");
140 System.printString("Processing - Hostname : ");
141 System.printString(hostname);
142 System.printString(", Path : ");
143 System.printString(path);
144 System.printString("\n");
146 Socket s = new Socket(hostname, 80);
148 requestQuery(hostname, path, s);
151 if ((title = grabTitle(lq)) != null) {
153 qt.gTitle = global new GlobalString(title);
158 qt.toprocess = processPage(lq);
165 public void done(Object obj) {
169 GlobalString str = global new GlobalString("true");
171 doneList.put(workingURL, str);
173 while(!toprocess.isEmpty()) {
174 GlobalQuery q = (GlobalQuery)toprocess.pop();
176 GlobalString hostname = global new GlobalString(q.getHostName());
177 GlobalString path = global new GlobalString(q.getPath());
179 GlobalStringBuffer gsb = global new GlobalStringBuffer(hostname);
183 if (!doneList.containsKey(gsb.toGlobalString())) {
189 public static String grabTitle(LocalQuery lq) {
190 String sTitle = new String("<title>");
191 String eTitle = new String("</title>");
192 String searchstr = lq.response.toString();
196 int mindex = searchstr.indexOf(sTitle);
198 int endquote = searchstr.indexOf(eTitle, mindex+sTitle.length());
200 title = new String(searchstr.subString(mindex+sTitle.length(), endquote));
202 if (Character.isWhitespace(title.charAt(0))){
204 while (Character.isWhitespace(title.charAt(mindex++)));
206 title = new String(title.subString(mindex));
209 if (Character.isWhitespace(title.charAt(title.length()-1))) {
210 endquote=title.length()-1;
211 while (Character.isWhitespace(title.charAt(endquote--)));
213 title = new String(title.subString(0, endquote));
216 if (errorPage(title))
223 public static boolean errorPage(String str) {
224 if (str.equals("301 Moved Permanently"))
226 else if (str.equals("302 Found"))
228 else if (str.equals("404 Not Found"))
234 public static void requestQuery(String hostname, String path, Socket sock) {
235 StringBuffer req = new StringBuffer("GET ");
238 req.append(" HTTP/1.1\r\nHost:");
239 req.append(hostname);
240 req.append("\r\n\r\n");
241 sock.write(req.toString().getBytes());
244 public static void readResponse(LocalQuery lq, Socket sock) {
249 // state 4 - \r\n\r\n
254 byte[] b=new byte[1];
255 int numchars=sock.read(b);
262 } else if (state==1) {
263 byte[] b=new byte[1];
264 int numchars=sock.read(b);
271 } else if (state==2) {
272 byte[] b=new byte[1];
273 int numchars=sock.read(b);
280 } else if (state==3) {
281 byte[] b=new byte[1];
282 int numchars=sock.read(b);
291 byte[] buffer=new byte[1024];
292 int numchars=sock.read(buffer);
296 String curr=(new String(buffer)).subString(0,numchars);
297 lq.response.append(curr);
303 public void processList() {
305 GlobalString token = null;
309 while (endquote != -1) {
310 endquote = gTitle.indexOf(' ', mindex);
312 if (endquote != -1) {
313 token = gTitle.subString(mindex, endquote);
314 mindex = endquote + 1;
318 token = refine(token);
321 token = gTitle.subString(mindex);
322 token = refine(token);
325 Queue q = (Queue)results.get(token);
327 q = global new Queue();
330 results.put(token, q);
331 System.out.println("Key : ["+token.toLocalString()+"],["+q.size()+"]");
335 public boolean filter(GlobalString str) {
336 if (str.equals("of")) return true;
337 else if (str.equals("for")) return true;
338 else if (str.equals("a")) return true;
339 else if (str.equals("an")) return true;
340 else if (str.equals("the")) return true;
341 else if (str.equals("at")) return true;
342 else if (str.equals("and")) return true;
343 else if (str.equals("or")) return true;
344 else if (str.equals("but")) return true;
345 else if (str.equals("to")) return true;
346 else if (str.equals(".")) return true;
347 else if (str.equals("=")) return true;
348 else if (str.equals("-")) return true;
349 else if (str.equals(":")) return true;
350 else if (str.equals(";")) return true;
351 else if (str.equals("\'")) return true;
352 else if (str.equals("\"")) return true;
353 else if (str.equals("|")) return true;
354 else if (str.equals("@")) return true;
355 else if (str.equals("&")) return true;
359 public GlobalString refine(GlobalString str) {
360 str = refinePrefix(str);
361 str = refinePostfix(str);
365 public GlobalString refinePrefix(GlobalString str) {
366 if (str.charAt(0) == '&') { // &
367 return str.subString(1);
372 public GlobalString refinePostfix(GlobalString str) {
373 if (str.charAt(str.length()-1) == ',') { // ,
374 return str.subString(0, str.length()-1);
376 else if (str.charAt(str.length()-1) == ':') { // :
377 return str.subString(0, str.length()-1);
379 else if (str.charAt(str.length()-1) == ';') { // ;
380 return str.subString(0, str.length()-1);
382 else if (str.charAt(str.length()-1) == '!') { // !
383 return str.subString(0, str.length()-1);
385 else if (str.charAt(str.length()-1) == 's') { // 's
386 if (str.charAt(str.length()-2) == '\'')
387 return str.subString(0, str.length()-2);
392 public static Queue processPage(LocalQuery lq) {
394 String href = new String("href=\"");
395 String searchstr = lq.response.toString();
400 depth = lq.getDepth() + 1;
402 toprocess = global new Queue();
404 int mindex = searchstr.indexOf(href,index);
406 int endquote = searchstr.indexOf('"', mindex+href.length());
407 if (endquote != -1) {
408 String match = searchstr.subString(mindex+href.length(), endquote);
409 String match2 = lq.makewebcanonical(match);
411 GlobalString ghostname;
414 ghostname = global new GlobalString(lq.getHostName(match));
415 gpath = global new GlobalString(lq.getPathName(match));
417 if (match2 != null) {
418 GlobalQuery gq = global new GlobalQuery(ghostname, gpath, depth);