more changes
authoradash <adash>
Sun, 1 Nov 2009 02:28:41 +0000 (02:28 +0000)
committeradash <adash>
Sun, 1 Nov 2009 02:28:41 +0000 (02:28 +0000)
Robust/src/Benchmarks/Distributed/SpamFilter/FilterResult.java
Robust/src/Benchmarks/Distributed/SpamFilter/FilterStatistic.java
Robust/src/Benchmarks/Distributed/SpamFilter/HashEntry.java
Robust/src/Benchmarks/Distributed/SpamFilter/HashStat.java
Robust/src/Benchmarks/Distributed/SpamFilter/Mail.java
Robust/src/Benchmarks/Distributed/SpamFilter/SignatureComputer.java
Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java

index 25a98ac48d2e83db533081bb285b71ecdc32e6d2..9eb03c52a310586ce9a70982ea0a67946b896bab 100644 (file)
@@ -2,53 +2,87 @@
  * A FilterResult encapsulates the result of a filter made by checking a mail.
  **/
 public class FilterResult {
-       /**
-        * This value is used if type is ERROR or UNKNOWN.
-        */
-       public double NO_RESULT;
-
-       /**
-        * A result value greater or equal this value indicates that the filter has
-        * decided on spam.
-        */
-       public double SPAM_THRESHOLD;
-       public double ABSOLUTE_SPAM;
-       public double ABSOLUTE_HAM;
-
-    //TODO decide a good way of deciding
-       public double result; // the result, a value between 0 (ham) and 1 (spam), negative values for "error", "unknown" etc.
-
-       //public HashMap<String,String> properties = new HashMap<String,String>(); // additional properties of the filter (mainly for statistics)
-
-       // -----------------------------------------------------------------------------
-
-       public FilterResult(double result) {
-      SPAM_THRESHOLD=0.5;
-      ABSOLUTE_SPAM=1.0;
-      ABSOLUTE_HAM=0.0;
-      NO_RESULT=-1;
-      this.result = result;
-    }
+  /**
+   * This value is used if type is ERROR or UNKNOWN.
+   */
+  public double NO_RESULT;
+
+  /**
+   * A result value greater or equal this value indicates that the filter has
+   * decided on spam.
+   */
+  public int SPAM_THRESHOLD;
+  public int ABSOLUTE_SPAM;
+  public int ABSOLUTE_HAM;
+
+  //public double result; // the result, a value between -1 (ham) and 1000 (spam), 
+  // negative values for "error", "unknown" etc.
+
+  // -----------------------------------------------------------------------------
+
+  public FilterResult(double result) {
+    SPAM_THRESHOLD=500;
+    ABSOLUTE_SPAM=1000;
+    ABSOLUTE_HAM=0;
+    NO_RESULT=-1;
+    this.result = result;
+  }
 
-       public double getResult() {
-               return result;
-       }
+  public FilterResult() {
+    SPAM_THRESHOLD=500;
+    ABSOLUTE_SPAM=1000;
+    ABSOLUTE_HAM=0;
+    NO_RESULT=-1;
+  }
+
+  public double getResult() {
+    return result;
+  }
+
+  public boolean isSpam() {
+    return result >= SPAM_THRESHOLD;
+  }
+
+  public boolean getResult(int[] confidenceVals) {
+    int[] res = new int[3];
+    for(int i=0; i<confidenceVals; i++) {
+       if(confidenceVals[i] < 0)
+         res[0]+=1; //unknown
+       if(confidenceVals[i] >= 0 && confidenceVals[i] < 500)
+         res[1]+=1; //ham
+       if(confidenceVals[i] > SPAM_THRESHOLD)
+         res[2]+=1;//spam
+    }
+    int maxVotes=0;
+    int max;
+    for(int i=0; i<3;i++) {
+      if(res[i] > maxVotes) {
+        maxVotes = res[i];
+        max = i;
+      }
+    }
+    if(i==0)
+      return false;
+    if(i==1)
+      return false;
+    if(i==2)
+      return true;
 
-       public boolean isSpam() {
-               return result >= SPAM_THRESHOLD;
-       }
+    System.out.println("Err: getResult() Shouldn't come here\n");
+    return false;
+  }
 
-    /*
-       public void addProperty(String key, String value) {
-               properties.put(key,value);
-       }
+  /*
+     public void addProperty(String key, String value) {
+     properties.put(key,value);
+     }
 
-       public String getProperty(String key) {
-               return properties.get(key);
-       }
+     public String getProperty(String key) {
+     return properties.get(key);
+     }
 
-       public HashMap<String,String> getProperties() {
-               return properties;
-       }
-    */
+     public HashMap<String,String> getProperties() {
+     return properties;
+     }
+   */
 }
index 0a920c9f93345533df30d79d78d7865fbf1ab798..c50b7023e79a30fc7860c687cf0493d39f5bf1fd 100644 (file)
@@ -1,66 +1,65 @@
 public class FilterStatistic {
-       int unknown;
-       int spam;
-       int ham;
+  int unknown;
+  int spam;
+  int ham;
 
-       // -------------------------------------------------------
-       
-       public FilterStatistic() {
-      this(0,0,0);
-       }
+  // -------------------------------------------------------
 
-       public FilterStatistic(int spam, int ham, int unknown) {
-               this.spam = spam;
-               this.ham = ham;
-               this.unknown = unknown;
-       }
+  public FilterStatistic() {
+    this(0,0,0);
+  }
 
-       public int getChecked() {
-      //TODO Change this formula
-               return getSpam() + getHam() + getUnknown();
-       }
+  public FilterStatistic(int spam, int ham, int unknown) {
+    this.spam = spam;
+    this.ham = ham;
+    this.unknown = unknown;
+  }
 
-       public int getHam() {
-               return ham;
-       }
+  public int getChecked() {
+    return getSpam() + getHam() + getUnknown();
+  }
 
-       public int getSpam() {
-               return spam;
-       }
+  public int getHam() {
+    return ham;
+  }
 
-       public String getName() {
-               return name;
-       }
+  public int getSpam() {
+    return spam;
+  }
 
-       public void setHam(int i) {
-               ham = i;
-       }
+  public String getName() {
+    return name;
+  }
 
-       public void setSpam(int i) {
-               spam = i;
-       }
+  public void setHam(int i) {
+    ham = i;
+  }
 
-       public int getUnknown() {
-               return unknown;
-       }
+  public void setSpam(int i) {
+    spam = i;
+  }
 
-       public void setUnknown(int u) {
-               unknown = u;
-       }
+  public int getUnknown() {
+    return unknown;
+  }
 
-       public void increaseSpam() {
-               setSpam(getSpam() + 1);
-       }
+  public void setUnknown(int u) {
+    unknown = u;
+  }
 
-       public void increaseHam() {
-               setHam(getHam() + 1);
-       }
+  public void increaseSpam() {
+    setSpam(getSpam() + 1);
+  }
 
-       public void increaseUnknown() {
-               setUnknown(getUnknown() + 1);
-       }
+  public void increaseHam() {
+    setHam(getHam() + 1);
+  }
 
-       public void setName(String name) {
-               this.name = name;
-       }
+  public void increaseUnknown() {
+    setUnknown(getUnknown() + 1);
+  }
+
+  public void setName(String name) {
+    this.name = name;
+  }
 }
index c3014f5882a4999ef64d2156466e5187e4e82ad8..95cfe40ab966fed40d7b490dfbec378c5498705e 100644 (file)
@@ -55,4 +55,14 @@ public class HashEntry {
     //  return false;
     return true;
   }
+
+  public int askForSpam() {
+    Vector users = stats.getUsers();
+    int spamConfidence=0;
+    for(int i=0; i<users.size(); i++) {
+      int userid = (int) (users.elementAt(i));
+      spamConfidence += stats.userstat[userid].getChecked();
+    }
+    return spamConfidence;
+  }
 }
index e475087aac1c63e4f9e2c6fcab3de4fdea55d199..a11004161aa2c64d4d0e6af090e90ac527844785 100644 (file)
@@ -1,6 +1,7 @@
 public class HashStat {
   int[] userid;
   FilterStatistic[] userstat; 
+  Vector listofusers;
   public HashStat() {
     userid = new int[8]; //max users for our system=8
     userstat = new FilterStatistic[8];
@@ -10,6 +11,7 @@ public class HashStat {
   }
 
   public void setuser(int id, int spam, int ham, int unknown) {
+    userid[id] = 1;
     userstat[id].setSpam(spam);
     userstat[id].setHam(ham);
     userstat[id].setUnknown(unknown);
@@ -30,4 +32,24 @@ public class HashStat {
   public int getunknowncount(int userid) {
     return userstat[userid].getUnknown();
   }
+
+  public Vector getUsers() {
+    for(int i=0; i<8; i++) {
+      if(userid[i] == 1) {
+        listofusers.addElement(i);
+      }
+    }
+    return listofusers;
+  }
+
+  public int numUsers() {
+    int count=0;
+    for(int i=0; i<8; i++) {
+      if(userid[i] == 1) {
+        count++;
+        listofusers.addElement(i);
+      }
+    }
+    return count;
+  }
 }
index 62379c10dc71d8c55aa0dd0a31865e22b9940cd7..f2c12167dc1639891e5618aaa9d5bc3498226bd7 100644 (file)
@@ -17,6 +17,7 @@ public class Mail {
 
        String messageID; // cached message ID for reuse (takes a lot of memory and is used all over the place)
                       //same as hashcode of a class
+    boolean isSpam;
 
   public Mail() {
       messageID=null;
@@ -258,6 +259,14 @@ public class Mail {
     return returnStrings;
   }
 
+  public void setIsSpam(boolean spam) {
+    isSpam = spam;
+  }
+
+  public boolean getIsSpam() {
+    return isSpam;
+  }
+
   public static void main(String[] args)
   {
     Mail mail = new Mail("./emails/email1");
@@ -269,5 +278,4 @@ public class Mail {
       System.out.println(b);
     }
   }
-
 }
index e0ffd26ae66c9f3dd4e35ae4b5b9d1be8efcfa01..2f5ab98107b0f8a1363f66e23ffc714352e0014f 100644 (file)
 public class SignatureComputer {
-       public EphemeralSignature sig4; //signature engines
-       public WhiplashSignature sig8; //signature engines
+  public EphemeralSignature sig4; //signature engines
+  public WhiplashSignature sig8; //signature engines
 
-       int[] enginesToUseForCheck;
+  int[] enginesToUseForCheck;
 
-       public SignatureComputer() {
-               sig4 = new EphemeralSignature(); //default values
-               sig8 = new WhiplashSignature();
-               createEnginesToCheck();
-       }
+  public SignatureComputer() {
+    sig4 = new EphemeralSignature(); //default values
+    sig8 = new WhiplashSignature();
+    createEnginesToCheck();
+  }
 
-       /**
-        * constructor to be used when some parsing has already taken place with the
-        * server-provides value <code>randomNumberSeed</code>.
-        
-        * @param randomNumberSeed
-        *        a non-negative number used for seeding the random number generator
-        *        before starting to hash values.
-        * @param separator
-        *        how the mail-text should be splitted into lines. (== what chars
-        *        separate 2 lines)
-        */
-       public SignatureComputer(int randomNumberSeed, String separator) {
-               sig4 = new EphemeralSignature(randomNumberSeed,separator);
-               sig8 = new WhiplashSignature();
-               createEnginesToCheck();
-       }
+  /**
+   * constructor to be used when some parsing has already taken place with the
+   * server-provides value <code>randomNumberSeed</code>.
+   * 
+   * @param randomNumberSeed
+   *        a non-negative number used for seeding the random number generator
+   *        before starting to hash values.
+   * @param separator
+   *        how the mail-text should be splitted into lines. (== what chars
+   *        separate 2 lines)
+   */
+  public SignatureComputer(int randomNumberSeed, String separator) {
+    sig4 = new EphemeralSignature(randomNumberSeed,separator);
+    sig8 = new WhiplashSignature();
+    createEnginesToCheck();
+  }
 
-       /**
-        * the constructor to be used most of the time. you can hand over the
-        * seed-string exactly as it is provided by the razor-server.
-        
-        * @param seedAndSeparator
-        *        a string containing the seed value for the RNG and a separator list
-        *        (separated by ' <b>- </b>'). default value is
-        *        <code>"7542-10"</code> which means server-seed 7542 and only one
-        *        separator 10 (which is ascii '\n').
-        */
-       public SignatureComputer(String seedAndSeparator) {
-               sig4 = new EphemeralSignature(seedAndSeparator);
-               sig8 = new WhiplashSignature();
-               createEnginesToCheck();
-       }
+  /**
+   * the constructor to be used most of the time. you can hand over the
+   * seed-string exactly as it is provided by the razor-server.
+   * 
+   * @param seedAndSeparator
+   *        a string containing the seed value for the RNG and a separator list
+   *        (separated by ' <b>- </b>'). default value is
+   *        <code>"7542-10"</code> which means server-seed 7542 and only one
+   *        separator 10 (which is ascii '\n').
+   */
+  public SignatureComputer(String seedAndSeparator) {
+    sig4 = new EphemeralSignature(seedAndSeparator);
+    sig8 = new WhiplashSignature();
+    createEnginesToCheck();
+  }
 
-       /**
-        
-        */
-       public void createEnginesToCheck() {
-               enginesToUseForCheck = new int[2];
-        enginesToUseForCheck[0] = 4; //Ephemeral engine
-        enginesToUseForCheck[1] = 8;//Whiplash engine
-       }
+  /**
+   * 
+   */
+  public void createEnginesToCheck() {
+    enginesToUseForCheck = new int[2];
+    enginesToUseForCheck[0] = 4; //Ephemeral engine
+    enginesToUseForCheck[1] = 8;//Whiplash engine
+  }
 
-       public boolean isSigSupported(int sig) {
-               boolean found = false;
-               for (int i = 0; i < enginesToUseForCheck.length && !found; i++) {
-                       if (enginesToUseForCheck[i] == sig) {
-                               found = true;
-                       }
-               }
-               return found;
-       }
+  public boolean isSigSupported(int sig) {
+    boolean found = false;
+    for (int i = 0; i < enginesToUseForCheck.length && !found; i++) {
+      if (enginesToUseForCheck[i] == sig) {
+        found = true;
+      }
+    }
+    return found;
+  }
 
-       public boolean isSigSupported(String sig) {
-               return (sig != null && isSigSupported(Integer.parseInt(sig)));
-       }
+  public boolean isSigSupported(String sig) {
+    return (sig != null && isSigSupported(Integer.parseInt(sig)));
+  }
 
-       public String getDefaultEngine() {
-               return "4";
-       }
+  public String getDefaultEngine() {
+    return "4";
+  }
 
-       public Vector computeSigs(StringBuffer[] EmailParts) {
-               if (EmailParts == null) return null;
+  public Vector computeSigs(Vector EmailParts) {
+    if (EmailParts == null) return null;
 
-               Vector printableSigs = new Vector(); // vector of strings
-               for (int mailIndex = 0; mailIndex < EmailParts.length; mailIndex++) {
-                       StringBuffer mail = EmailParts[mailIndex];
+    Vector printableSigs = new Vector(); // vector of strings
+    for (int mailIndex = 0; mailIndex < EmailParts.size(); mailIndex++) {
+      String mail = EmailParts.elementAt(mailIndex);
 
-                       if (mail == null) continue;
+      if (mail == null) continue;
 
-            /*
-             * Compute Sig for bodyparts that are cleaned.
-             */
-            for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
-              int engineNo = enginesToUseForCheck[engineIndex];
-              String sig = null;
+      /*
+       * Compute Sig for bodyparts that are cleaned.
+       */
+      for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) {
+        int engineNo = enginesToUseForCheck[engineIndex];
+        String sig = null;
 
-              switch (engineNo) {
-                case 4:
-                  sig = computeSignature(engineNo,mail.toString());
-                  break;
-                case 8:
-                  sig = computeSignature(engineNo,mail.toString());
-                  break;
-                default:
-                  System.out.println("Couldn't find the signature engine\n");
-                  //sig = computeSignature(engineNo,curPart.getCleaned());
-                  break;
-              }//switch engineNo
+        switch (engineNo) {
+          case 4:
+            sig = computeSignature(engineNo,mail);
+            break;
+          case 8:
+            sig = computeSignature(engineNo,mail);
+            break;
+          default:
+            System.out.println("Couldn't find the signature engine\n");
+            //sig = computeSignature(engineNo,curPart.getCleaned());
+            break;
+        }//switch engineNo
 
-              if (sig != null && sig.length > 0) {
-                String hash = engineNo + ":" + sig[curSigIndex];
-                printableSigs.add(hash);
-              } else {
-                /* we didn't produce a signature for the mail. */
-              }
-            }//engine
-        }//each emails part
-        return printableSigs;
-    }//computeSigs
+        if (sig != null && sig.length > 0) {
+          String hash = engineNo + ":" + sig[curSigIndex];
+          printableSigs.add(hash);
+        } else {
+          /* we didn't produce a signature for the mail. */
+        }
+      }//engine
+    }//each emails part
+    return printableSigs;
+  }//computeSigs
 
-       /**
-        * @param engineNo
-        * @param cleaned
-        * @return
-        */
-       private String[] computeSignature(int engineNo, String mail) {
-               switch (engineNo) {
-                       case 4:
-                               return new String[] { this.sig4.computeSignature(mail) };
-                       case 8:
-                //TODO device and equivalent for this
-                               //String cleanedButKeepHTML = Preprocessor.preprocess(mail,Preprocessor.ConfigParams.NO_DEHTML);
-                               return this.sig8.computeSignature(cleanedButKeepHTML);
-                       default:
-                               return null;
-               }
-       }
+  /**
+   * @param engineNo
+   * @param cleaned
+   * @return
+   */
+  private String computeSignature(int engineNo, String mail) {
+    switch (engineNo) {
+      case 4:
+        return new String { this.sig4.computeSignature(mail) };
+      case 8:
+        //TODO device and equivalent for this
+        //String cleanedButKeepHTML = Preprocessor.preprocess(mail,Preprocessor.ConfigParams.NO_DEHTML);
+        return this.sig8.computeSignature(cleanedButKeepHTML);
+      default:
+        return null;
+    }
+  }
 }
index e134e0423807ca88a4d8af9edf12bc6b7864c1c6..d690f25300f6d7f1525c07b6799f187bba2c5686 100644 (file)
@@ -33,8 +33,12 @@ public class SpamFilter extends Thread {
     for(int i=0; i<niter; i++) {
       for(int j=0; j<nemails; j++) {
         int pickemail = rand.nextInt(100);
-        //String email = getEmail(pickemail);
-        checkMail(email, thid);
+        //Mail email = getEmail(pickemail);
+        boolean filterAnswer = checkMail(email, thid);
+        boolean userAnswer = email.getIsSpam();
+        if(filterAnswer != userAnswer) {
+          sendFeedBack(email);
+        }
       }
     }
   }
@@ -108,27 +112,30 @@ public class SpamFilter extends Thread {
   }
 
   /**
-   *  Returns signatures to the Spam filter
+   *  Returns result to the Spam filter
    **/
-  public FilterResult[] checkMail(Mail mail, int userid) {
+  public boolean checkMail(Mail mail, int userid) {
     //Preprocess emails
-      //String[] partsOfMailStrings = createMailStrings();
-      //RazorMail[] razorMails = 
+    //Vector partsOfMailStrings = createMailStrings(mail);
+
     //Compute signatures
     SignatureComputer sigComp = new SignatureComputer();
     Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of strings
-          
+
     //check with global data structure
-    check(signatures, userid);
+    int[] confidenceVals = check(signatures,userid);
 
     //---- create and  return results --------
-    FilterResult[] filterResults = new FilterResult[mailStrings.length];
+    FilterResult filterResult = new FilterResult();
+    boolean spam = filterResult.getResult();
 
-    return filterResults;
+    return spam;
   } 
 
-  public void check(Vector emailParts, int userid) {
-    for(int i=0; i<emailParts.size(); i++) {
+  public int[] check(Vector emailParts, int userid) {
+    int numparts = emailParts.size();
+    int[] confidenceVals = new int[numparts];
+    for(int i=0; i<numparts; i++) {
       String part = (String)(emailParts.elementAt(i));
       char tmpengine = part.charAt(0);
       String engine =  new String(tmpengine);
@@ -138,80 +145,20 @@ public class SpamFilter extends Thread {
       myhe.setsig(signature);
       //find object in distributedhashMap: if no object then add object 
       //else read object
-      HashEntry tmphe;
-      if((tmphe=(HashEntry)mydhmap.get(myhe))== null) {
+      HashEntry tmphe= (HashEntry)(mydhmap.get(myhe));
+      if(tmphe == null) {
         //add new object
         myhe.stats = new HashStat();
-        myhe.stats.setuser(userid, 0, 0, 1);
+        myhe.stats.setuser(userid, 0, 0, -1);
+        FilterStatistic fs = new FilterStatistic(0,0,-1);
+        mydhmap.put(myhe, fs);
       } else {
-        //else if read object
-        Vector<String> enginesToSend = new Vector<String>();
-        Vector<String> sigsToSend = new Vector<String>();
-
-        for (RazorMail mail : razorMails) {
-          for (int partNr = 0; partNr < mail.getPartSize(); partNr++) {
-            Part part = mail.getPart(partNr);
-            if (part.skipMe()) {
-              continue;
-            }
-
-            for (Iterator<String> hashIter = part.getHashIterator(); hashIter.hasNext();) {
-              String curHash = (String)hashIter.next();
-              String[] engineHashSplit = curHash.split(":");
-              String engine = engineHashSplit[0];
-              String signature = engineHashSplit[1];
-              enginesToSend.add(engine);
-              sigsToSend.add(signature);
-            }
-          }
-        }
-
-        if (sigsToSend.size() == 0) { // nothing to send
-          return;
-        }
-
-        String[] enginesToSendArr = new String[enginesToSend.size()];
-        enginesToSend.toArray(enginesToSendArr);
-        String[] sigsToSendArr = new String[sigsToSend.size()];
-        sigsToSend.toArray(sigsToSendArr);
-
-        // ----- now connect to server and ask query -----
-        int[] confidenceVals = null;
-        RazorCommunicationEngine checkEngine = getCheckEngine();
-        try {
-          checkEngine.connect();
-          confidenceVals = checkEngine.askForSpam(sigsToSendArr,enginesToSendArr);
-          checkEngine.disconnect();
-        } finally {
-          checkEngines.add(checkEngine);
-        }
-
-        if (confidenceVals == null) {
-          System.err.println("check got no answer from server. error.");
-          return; // error
-        }
-
-        if (confidenceVals.length != sigsToSendArr.length) {
-          throw new IllegalStateException("We got not enough answers from server. expected: " + sigsToSendArr.length + "  received: " + confidenceVals.length);
-        }
-
-        // ----- now dispatch the answers to the mail objects -----
-        int answerIndex = 0;
-        for (RazorMail mail : razorMails) {
-          for (int partNr = 0; partNr < mail.getPartSize(); partNr++) {
-            Part part = mail.getPart(partNr);
-            if (part.skipMe()) {
-              continue;
-            }
-
-            for (Iterator<String> hashIter = part.getHashIterator(); hashIter.hasNext();) {
-              String curHash = hashIter.next();
-              part.setResponse(curHash,String.valueOf(confidenceVals[answerIndex++]));
-            }
-          }
-        }
-        //  --> after this loop the mail is able to determine if it is spam or not
+        // ----- now connect to global data structure and ask query -----
+        confidenceVals[i] = tmphe.askForSpam(numparts);
       }
     }
+
+    //  --> the mail client is able to determine if it is spam or not
+    return confidenceVals;
   }
 }