more changes to check emails
authoradash <adash>
Sat, 31 Oct 2009 02:19:18 +0000 (02:19 +0000)
committeradash <adash>
Sat, 31 Oct 2009 02:19:18 +0000 (02:19 +0000)
Robust/src/Benchmarks/Distributed/SpamFilter/HashEntry.java
Robust/src/Benchmarks/Distributed/SpamFilter/HashStat.java
Robust/src/Benchmarks/Distributed/SpamFilter/SignatureComputer.java
Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java

index 31d60aad329ae5f3633d19026185eb5ac7cd3fd5..c3014f5882a4999ef64d2156466e5187e4e82ad8 100644 (file)
@@ -14,10 +14,23 @@ public class HashEntry {
     // this will not work well if some of the strings are equal.
     result = engine.hashCode();
     result ^= signature.hashCode();
-    result ^= stats.hashCode();
+    //result ^= stats.hashCode();
+    System.out.println("result= " + result);
     return result;
   }
 
+  public void setengine(String engine) {
+    this.engine=engine;
+  }
+
+  public void setstats(HashStat stats) {
+    this.stats=stats;
+  }
+
+  public void setsig(String signature) {
+    this.setsig=signature;
+  }
+
   public String getEngine() {
     return engine;
   }
@@ -38,8 +51,8 @@ public class HashEntry {
       return false;
     if(!(he.getSignature().equals(Signature)))
       return false;
-    if(!(he.getStats().equals(stats)))
-      return false;
+    //if(!(he.getStats().equals(stats)))
+    //  return false;
     return true;
   }
 }
index 68b3ebf56115e247f0518a4dbd50a9a4e431c16d..e475087aac1c63e4f9e2c6fcab3de4fdea55d199 100644 (file)
@@ -9,6 +9,12 @@ public class HashStat {
     }
   }
 
+  public void setuser(int id, int spam, int ham, int unknown) {
+    userstat[id].setSpam(spam);
+    userstat[id].setHam(ham);
+    userstat[id].setUnknown(unknown);
+  }
+
   public int getuser(int id) {
     return userid[id];
   }
index 0761117286b05acc493df77578ee8ac08e9f072c..e0ffd26ae66c9f3dd4e35ae4b5b9d1be8efcfa01 100644 (file)
@@ -70,12 +70,12 @@ public class SignatureComputer {
                return "4";
        }
 
-       public Vector computeSigs(StringBuffer[] Mails) {
-               if (Mails == null) return null;
+       public Vector computeSigs(StringBuffer[] EmailParts) {
+               if (EmailParts == null) return null;
 
-               Vector printableSigs = new Vector();
-               for (int mailIndex = 0; mailIndex < Mails.length; mailIndex++) {
-                       StringBuffer mail = Mails[mailIndex];
+               Vector printableSigs = new Vector(); // vector of strings
+               for (int mailIndex = 0; mailIndex < EmailParts.length; mailIndex++) {
+                       StringBuffer mail = EmailParts[mailIndex];
 
                        if (mail == null) continue;
 
@@ -84,36 +84,29 @@ public class SignatureComputer {
              */
             for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
               int engineNo = enginesToUseForCheck[engineIndex];
-              String[] sig = null;
+              String sig = null;
 
               switch (engineNo) {
                 case 4:
-                  sig = computeSignature(engineNo,curPart.getCleaned());
+                  sig = computeSignature(engineNo,mail.toString());
                   break;
                 case 8:
-                  sig = computeSignature(engineNo,curPart.getBody());
+                  sig = computeSignature(engineNo,mail.toString());
                   break;
                 default:
-                  /*
-                   * for nilsimsa and sha1 wich are no longer supported by
-                   * the server and might be removed someday
-                   */
-                  sig = computeSignature(engineNo,curPart.getCleaned());
+                  System.out.println("Couldn't find the signature engine\n");
+                  //sig = computeSignature(engineNo,curPart.getCleaned());
                   break;
               }//switch engineNo
 
               if (sig != null && sig.length > 0) {
-                for (int curSigIndex = 0; curSigIndex < sig.length; curSigIndex++) {
-                  String hash = engineNo + ":" + sig[curSigIndex];
-                  curPart.addHash(hash);
-                  printableSigs.add(hash);
-                }
-
+                String hash = engineNo + ":" + sig[curSigIndex];
+                printableSigs.add(hash);
               } else {
                 /* we didn't produce a signature for the mail. */
               }
             }//engine
-        }//mails
+        }//each emails part
         return printableSigs;
     }//computeSigs
 
@@ -127,32 +120,11 @@ public class SignatureComputer {
                        case 4:
                                return new String[] { this.sig4.computeSignature(mail) };
                        case 8:
-                               String cleanedButKeepHTML = Preprocessor.preprocess(mail,Preprocessor.ConfigParams.NO_DEHTML);
+                //TODO device and equivalent for this
+                               //String cleanedButKeepHTML = Preprocessor.preprocess(mail,Preprocessor.ConfigParams.NO_DEHTML);
                                return this.sig8.computeSignature(cleanedButKeepHTML);
                        default:
                                return null;
                }
        }
-
-       public static String[] getCommonSupportedEngines(int serverSupportedEngines) {
-               Vector<String> commonSupported = new Vector<String>();
-               int engineMask = 1;
-               int engineIndex = 1;
-               while (engineIndex < 32) {
-                       boolean serverSupported = (serverSupportedEngines & engineMask) > 0;
-                       boolean clientSupported = isSigSupported(engineIndex);
-                       if (serverSupported && clientSupported) {
-                               commonSupported.add(String.valueOf(engineIndex));
-                       }
-                       //switch to next
-                       engineMask <<= 1; //shift one to left
-                       engineIndex++;
-               }
-               if (commonSupported.size() == 0) {
-                       return null;
-               }
-               String[] result = new String[commonSupported.size()];
-               commonSupported.toArray(result);
-               return result;
-       }
 }
index 7a9b62b4b7d11121d73cb2b1d7962bef77a0c9dd..e134e0423807ca88a4d8af9edf12bc6b7864c1c6 100644 (file)
@@ -12,7 +12,7 @@ public class SpamFilter extends Thread {
 
   }
 
-  public SpamFilter(int numiter, int numemail,int threadid) {
+  public SpamFilter(int numiter, int numemail,int id) {
     this.numiter=numiter;
     this.numemail=numemail;
     this.id = id;
@@ -20,10 +20,12 @@ public class SpamFilter extends Thread {
 
   public void run() {
     int niter;
-    int nemails
+    int nemails;
+    int thid;
     atomic {
       niter=numiter;
       nemails=numemails;
+      thid = id;
     }
 
     Random rand = new Random(0);
@@ -32,7 +34,7 @@ public class SpamFilter extends Thread {
       for(int j=0; j<nemails; j++) {
         int pickemail = rand.nextInt(100);
         //String email = getEmail(pickemail);
-        //checkMails(email);
+        checkMail(email, thid);
       }
     }
   }
@@ -108,20 +110,108 @@ public class SpamFilter extends Thread {
   /**
    *  Returns signatures to the Spam filter
    **/
-  public FilterResult[] checkMail(Mail mail) {
+  public FilterResult[] checkMail(Mail mail, int userid) {
     //Preprocess emails
-      //StringBuffer[] partsOfMailStrings = createMailStrings();
+      //String[] partsOfMailStrings = createMailStrings();
       //RazorMail[] razorMails = 
     //Compute signatures
     SignatureComputer sigComp = new SignatureComputer();
     Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of strings
           
     //check with global data structure
-    check(signatures);
+    check(signatures, userid);
 
     //---- create and  return results --------
     FilterResult[] filterResults = new FilterResult[mailStrings.length];
 
     return filterResults;
   } 
+
+  public void check(Vector emailParts, int userid) {
+    for(int i=0; i<emailParts.size(); i++) {
+      String part = (String)(emailParts.elementAt(i));
+      char tmpengine = part.charAt(0);
+      String engine =  new String(tmpengine);
+      String signature = part.substring(2); //a:b index(a)=0, index(:)=1, index(b)=2
+      HashEntry myhe = new HashEntry();
+      myhe.setengine(engine);
+      myhe.setsig(signature);
+      //find object in distributedhashMap: if no object then add object 
+      //else read object
+      HashEntry tmphe;
+      if((tmphe=(HashEntry)mydhmap.get(myhe))== null) {
+        //add new object
+        myhe.stats = new HashStat();
+        myhe.stats.setuser(userid, 0, 0, 1);
+      } else {
+        //else if read object
+        Vector<String> enginesToSend = new Vector<String>();
+        Vector<String> sigsToSend = new Vector<String>();
+
+        for (RazorMail mail : razorMails) {
+          for (int partNr = 0; partNr < mail.getPartSize(); partNr++) {
+            Part part = mail.getPart(partNr);
+            if (part.skipMe()) {
+              continue;
+            }
+
+            for (Iterator<String> hashIter = part.getHashIterator(); hashIter.hasNext();) {
+              String curHash = (String)hashIter.next();
+              String[] engineHashSplit = curHash.split(":");
+              String engine = engineHashSplit[0];
+              String signature = engineHashSplit[1];
+              enginesToSend.add(engine);
+              sigsToSend.add(signature);
+            }
+          }
+        }
+
+        if (sigsToSend.size() == 0) { // nothing to send
+          return;
+        }
+
+        String[] enginesToSendArr = new String[enginesToSend.size()];
+        enginesToSend.toArray(enginesToSendArr);
+        String[] sigsToSendArr = new String[sigsToSend.size()];
+        sigsToSend.toArray(sigsToSendArr);
+
+        // ----- now connect to server and ask query -----
+        int[] confidenceVals = null;
+        RazorCommunicationEngine checkEngine = getCheckEngine();
+        try {
+          checkEngine.connect();
+          confidenceVals = checkEngine.askForSpam(sigsToSendArr,enginesToSendArr);
+          checkEngine.disconnect();
+        } finally {
+          checkEngines.add(checkEngine);
+        }
+
+        if (confidenceVals == null) {
+          System.err.println("check got no answer from server. error.");
+          return; // error
+        }
+
+        if (confidenceVals.length != sigsToSendArr.length) {
+          throw new IllegalStateException("We got not enough answers from server. expected: " + sigsToSendArr.length + "  received: " + confidenceVals.length);
+        }
+
+        // ----- now dispatch the answers to the mail objects -----
+        int answerIndex = 0;
+        for (RazorMail mail : razorMails) {
+          for (int partNr = 0; partNr < mail.getPartSize(); partNr++) {
+            Part part = mail.getPart(partNr);
+            if (part.skipMe()) {
+              continue;
+            }
+
+            for (Iterator<String> hashIter = part.getHashIterator(); hashIter.hasNext();) {
+              String curHash = hashIter.next();
+              part.setResponse(curHash,String.valueOf(confidenceVals[answerIndex++]));
+            }
+          }
+        }
+        //  --> after this loop the mail is able to determine if it is spam or not
+      }
+    }
+  }
 }