From 7b35bfd4a9908ca197335e0d168f47bcb99255bc Mon Sep 17 00:00:00 2001 From: adash Date: Sun, 1 Nov 2009 02:28:41 +0000 Subject: [PATCH] more changes --- .../Distributed/SpamFilter/FilterResult.java | 122 ++++++---- .../SpamFilter/FilterStatistic.java | 99 ++++---- .../Distributed/SpamFilter/HashEntry.java | 10 + .../Distributed/SpamFilter/HashStat.java | 22 ++ .../Distributed/SpamFilter/Mail.java | 10 +- .../SpamFilter/SignatureComputer.java | 226 +++++++++--------- .../Distributed/SpamFilter/SpamFilter.java | 111 +++------ 7 files changed, 310 insertions(+), 290 deletions(-) diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/FilterResult.java b/Robust/src/Benchmarks/Distributed/SpamFilter/FilterResult.java index 25a98ac4..9eb03c52 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/FilterResult.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/FilterResult.java @@ -2,53 +2,87 @@ * A FilterResult encapsulates the result of a filter made by checking a mail. **/ public class FilterResult { - /** - * This value is used if type is ERROR or UNKNOWN. - */ - public double NO_RESULT; - - /** - * A result value greater or equal this value indicates that the filter has - * decided on spam. - */ - public double SPAM_THRESHOLD; - public double ABSOLUTE_SPAM; - public double ABSOLUTE_HAM; - - //TODO decide a good way of deciding - public double result; // the result, a value between 0 (ham) and 1 (spam), negative values for "error", "unknown" etc. - - //public HashMap properties = new HashMap(); // additional properties of the filter (mainly for statistics) - - // ----------------------------------------------------------------------------- - - public FilterResult(double result) { - SPAM_THRESHOLD=0.5; - ABSOLUTE_SPAM=1.0; - ABSOLUTE_HAM=0.0; - NO_RESULT=-1; - this.result = result; - } + /** + * This value is used if type is ERROR or UNKNOWN. + */ + public double NO_RESULT; + + /** + * A result value greater or equal this value indicates that the filter has + * decided on spam. + */ + public int SPAM_THRESHOLD; + public int ABSOLUTE_SPAM; + public int ABSOLUTE_HAM; + + //public double result; // the result, a value between -1 (ham) and 1000 (spam), + // negative values for "error", "unknown" etc. + + // ----------------------------------------------------------------------------- + + public FilterResult(double result) { + SPAM_THRESHOLD=500; + ABSOLUTE_SPAM=1000; + ABSOLUTE_HAM=0; + NO_RESULT=-1; + this.result = result; + } - public double getResult() { - return result; - } + public FilterResult() { + SPAM_THRESHOLD=500; + ABSOLUTE_SPAM=1000; + ABSOLUTE_HAM=0; + NO_RESULT=-1; + } + + public double getResult() { + return result; + } + + public boolean isSpam() { + return result >= SPAM_THRESHOLD; + } + + public boolean getResult(int[] confidenceVals) { + int[] res = new int[3]; + for(int i=0; i= 0 && confidenceVals[i] < 500) + res[1]+=1; //ham + if(confidenceVals[i] > SPAM_THRESHOLD) + res[2]+=1;//spam + } + int maxVotes=0; + int max; + for(int i=0; i<3;i++) { + if(res[i] > maxVotes) { + maxVotes = res[i]; + max = i; + } + } + if(i==0) + return false; + if(i==1) + return false; + if(i==2) + return true; - public boolean isSpam() { - return result >= SPAM_THRESHOLD; - } + System.out.println("Err: getResult() Shouldn't come here\n"); + return false; + } - /* - public void addProperty(String key, String value) { - properties.put(key,value); - } + /* + public void addProperty(String key, String value) { + properties.put(key,value); + } - public String getProperty(String key) { - return properties.get(key); - } + public String getProperty(String key) { + return properties.get(key); + } - public HashMap getProperties() { - return properties; - } - */ + public HashMap getProperties() { + return properties; + } + */ } diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/FilterStatistic.java b/Robust/src/Benchmarks/Distributed/SpamFilter/FilterStatistic.java index 0a920c9f..c50b7023 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/FilterStatistic.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/FilterStatistic.java @@ -1,66 +1,65 @@ public class FilterStatistic { - int unknown; - int spam; - int ham; + int unknown; + int spam; + int ham; - // ------------------------------------------------------- - - public FilterStatistic() { - this(0,0,0); - } + // ------------------------------------------------------- - public FilterStatistic(int spam, int ham, int unknown) { - this.spam = spam; - this.ham = ham; - this.unknown = unknown; - } + public FilterStatistic() { + this(0,0,0); + } - public int getChecked() { - //TODO Change this formula - return getSpam() + getHam() + getUnknown(); - } + public FilterStatistic(int spam, int ham, int unknown) { + this.spam = spam; + this.ham = ham; + this.unknown = unknown; + } - public int getHam() { - return ham; - } + public int getChecked() { + return getSpam() + getHam() + getUnknown(); + } - public int getSpam() { - return spam; - } + public int getHam() { + return ham; + } - public String getName() { - return name; - } + public int getSpam() { + return spam; + } - public void setHam(int i) { - ham = i; - } + public String getName() { + return name; + } - public void setSpam(int i) { - spam = i; - } + public void setHam(int i) { + ham = i; + } - public int getUnknown() { - return unknown; - } + public void setSpam(int i) { + spam = i; + } - public void setUnknown(int u) { - unknown = u; - } + public int getUnknown() { + return unknown; + } - public void increaseSpam() { - setSpam(getSpam() + 1); - } + public void setUnknown(int u) { + unknown = u; + } - public void increaseHam() { - setHam(getHam() + 1); - } + public void increaseSpam() { + setSpam(getSpam() + 1); + } - public void increaseUnknown() { - setUnknown(getUnknown() + 1); - } + public void increaseHam() { + setHam(getHam() + 1); + } - public void setName(String name) { - this.name = name; - } + public void increaseUnknown() { + setUnknown(getUnknown() + 1); + } + + public void setName(String name) { + this.name = name; + } } diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/HashEntry.java b/Robust/src/Benchmarks/Distributed/SpamFilter/HashEntry.java index c3014f58..95cfe40a 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/HashEntry.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/HashEntry.java @@ -55,4 +55,14 @@ public class HashEntry { // return false; return true; } + + public int askForSpam() { + Vector users = stats.getUsers(); + int spamConfidence=0; + for(int i=0; irandomNumberSeed. - * - * @param randomNumberSeed - * a non-negative number used for seeding the random number generator - * before starting to hash values. - * @param separator - * how the mail-text should be splitted into lines. (== what chars - * separate 2 lines) - */ - public SignatureComputer(int randomNumberSeed, String separator) { - sig4 = new EphemeralSignature(randomNumberSeed,separator); - sig8 = new WhiplashSignature(); - createEnginesToCheck(); - } + /** + * constructor to be used when some parsing has already taken place with the + * server-provides value randomNumberSeed. + * + * @param randomNumberSeed + * a non-negative number used for seeding the random number generator + * before starting to hash values. + * @param separator + * how the mail-text should be splitted into lines. (== what chars + * separate 2 lines) + */ + public SignatureComputer(int randomNumberSeed, String separator) { + sig4 = new EphemeralSignature(randomNumberSeed,separator); + sig8 = new WhiplashSignature(); + createEnginesToCheck(); + } - /** - * the constructor to be used most of the time. you can hand over the - * seed-string exactly as it is provided by the razor-server. - * - * @param seedAndSeparator - * a string containing the seed value for the RNG and a separator list - * (separated by ' - '). default value is - * "7542-10" which means server-seed 7542 and only one - * separator 10 (which is ascii '\n'). - */ - public SignatureComputer(String seedAndSeparator) { - sig4 = new EphemeralSignature(seedAndSeparator); - sig8 = new WhiplashSignature(); - createEnginesToCheck(); - } + /** + * the constructor to be used most of the time. you can hand over the + * seed-string exactly as it is provided by the razor-server. + * + * @param seedAndSeparator + * a string containing the seed value for the RNG and a separator list + * (separated by ' - '). default value is + * "7542-10" which means server-seed 7542 and only one + * separator 10 (which is ascii '\n'). + */ + public SignatureComputer(String seedAndSeparator) { + sig4 = new EphemeralSignature(seedAndSeparator); + sig8 = new WhiplashSignature(); + createEnginesToCheck(); + } - /** - * - */ - public void createEnginesToCheck() { - enginesToUseForCheck = new int[2]; - enginesToUseForCheck[0] = 4; //Ephemeral engine - enginesToUseForCheck[1] = 8;//Whiplash engine - } + /** + * + */ + public void createEnginesToCheck() { + enginesToUseForCheck = new int[2]; + enginesToUseForCheck[0] = 4; //Ephemeral engine + enginesToUseForCheck[1] = 8;//Whiplash engine + } - public boolean isSigSupported(int sig) { - boolean found = false; - for (int i = 0; i < enginesToUseForCheck.length && !found; i++) { - if (enginesToUseForCheck[i] == sig) { - found = true; - } - } - return found; - } + public boolean isSigSupported(int sig) { + boolean found = false; + for (int i = 0; i < enginesToUseForCheck.length && !found; i++) { + if (enginesToUseForCheck[i] == sig) { + found = true; + } + } + return found; + } - public boolean isSigSupported(String sig) { - return (sig != null && isSigSupported(Integer.parseInt(sig))); - } + public boolean isSigSupported(String sig) { + return (sig != null && isSigSupported(Integer.parseInt(sig))); + } - public String getDefaultEngine() { - return "4"; - } + public String getDefaultEngine() { + return "4"; + } - public Vector computeSigs(StringBuffer[] EmailParts) { - if (EmailParts == null) return null; + public Vector computeSigs(Vector EmailParts) { + if (EmailParts == null) return null; - Vector printableSigs = new Vector(); // vector of strings - for (int mailIndex = 0; mailIndex < EmailParts.length; mailIndex++) { - StringBuffer mail = EmailParts[mailIndex]; + Vector printableSigs = new Vector(); // vector of strings + for (int mailIndex = 0; mailIndex < EmailParts.size(); mailIndex++) { + String mail = EmailParts.elementAt(mailIndex); - if (mail == null) continue; + if (mail == null) continue; - /* - * Compute Sig for bodyparts that are cleaned. - */ - for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) { - int engineNo = enginesToUseForCheck[engineIndex]; - String sig = null; + /* + * Compute Sig for bodyparts that are cleaned. + */ + for (int engineIndex = 0; engineIndex < enginesToUseForCheck.length; engineIndex++) { + int engineNo = enginesToUseForCheck[engineIndex]; + String sig = null; - switch (engineNo) { - case 4: - sig = computeSignature(engineNo,mail.toString()); - break; - case 8: - sig = computeSignature(engineNo,mail.toString()); - break; - default: - System.out.println("Couldn't find the signature engine\n"); - //sig = computeSignature(engineNo,curPart.getCleaned()); - break; - }//switch engineNo + switch (engineNo) { + case 4: + sig = computeSignature(engineNo,mail); + break; + case 8: + sig = computeSignature(engineNo,mail); + break; + default: + System.out.println("Couldn't find the signature engine\n"); + //sig = computeSignature(engineNo,curPart.getCleaned()); + break; + }//switch engineNo - if (sig != null && sig.length > 0) { - String hash = engineNo + ":" + sig[curSigIndex]; - printableSigs.add(hash); - } else { - /* we didn't produce a signature for the mail. */ - } - }//engine - }//each emails part - return printableSigs; - }//computeSigs + if (sig != null && sig.length > 0) { + String hash = engineNo + ":" + sig[curSigIndex]; + printableSigs.add(hash); + } else { + /* we didn't produce a signature for the mail. */ + } + }//engine + }//each emails part + return printableSigs; + }//computeSigs - /** - * @param engineNo - * @param cleaned - * @return - */ - private String[] computeSignature(int engineNo, String mail) { - switch (engineNo) { - case 4: - return new String[] { this.sig4.computeSignature(mail) }; - case 8: - //TODO device and equivalent for this - //String cleanedButKeepHTML = Preprocessor.preprocess(mail,Preprocessor.ConfigParams.NO_DEHTML); - return this.sig8.computeSignature(cleanedButKeepHTML); - default: - return null; - } - } + /** + * @param engineNo + * @param cleaned + * @return + */ + private String computeSignature(int engineNo, String mail) { + switch (engineNo) { + case 4: + return new String { this.sig4.computeSignature(mail) }; + case 8: + //TODO device and equivalent for this + //String cleanedButKeepHTML = Preprocessor.preprocess(mail,Preprocessor.ConfigParams.NO_DEHTML); + return this.sig8.computeSignature(cleanedButKeepHTML); + default: + return null; + } + } } diff --git a/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java b/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java index e134e042..d690f253 100644 --- a/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java +++ b/Robust/src/Benchmarks/Distributed/SpamFilter/SpamFilter.java @@ -33,8 +33,12 @@ public class SpamFilter extends Thread { for(int i=0; i enginesToSend = new Vector(); - Vector sigsToSend = new Vector(); - - for (RazorMail mail : razorMails) { - for (int partNr = 0; partNr < mail.getPartSize(); partNr++) { - Part part = mail.getPart(partNr); - if (part.skipMe()) { - continue; - } - - for (Iterator hashIter = part.getHashIterator(); hashIter.hasNext();) { - String curHash = (String)hashIter.next(); - String[] engineHashSplit = curHash.split(":"); - String engine = engineHashSplit[0]; - String signature = engineHashSplit[1]; - enginesToSend.add(engine); - sigsToSend.add(signature); - } - } - } - - if (sigsToSend.size() == 0) { // nothing to send - return; - } - - String[] enginesToSendArr = new String[enginesToSend.size()]; - enginesToSend.toArray(enginesToSendArr); - String[] sigsToSendArr = new String[sigsToSend.size()]; - sigsToSend.toArray(sigsToSendArr); - - // ----- now connect to server and ask query ----- - int[] confidenceVals = null; - RazorCommunicationEngine checkEngine = getCheckEngine(); - try { - checkEngine.connect(); - confidenceVals = checkEngine.askForSpam(sigsToSendArr,enginesToSendArr); - checkEngine.disconnect(); - } finally { - checkEngines.add(checkEngine); - } - - if (confidenceVals == null) { - System.err.println("check got no answer from server. error."); - return; // error - } - - if (confidenceVals.length != sigsToSendArr.length) { - throw new IllegalStateException("We got not enough answers from server. expected: " + sigsToSendArr.length + " received: " + confidenceVals.length); - } - - // ----- now dispatch the answers to the mail objects ----- - int answerIndex = 0; - for (RazorMail mail : razorMails) { - for (int partNr = 0; partNr < mail.getPartSize(); partNr++) { - Part part = mail.getPart(partNr); - if (part.skipMe()) { - continue; - } - - for (Iterator hashIter = part.getHashIterator(); hashIter.hasNext();) { - String curHash = hashIter.next(); - part.setResponse(curHash,String.valueOf(confidenceVals[answerIndex++])); - } - } - } - // --> after this loop the mail is able to determine if it is spam or not + // ----- now connect to global data structure and ask query ----- + confidenceVals[i] = tmphe.askForSpam(numparts); } } + + // --> the mail client is able to determine if it is spam or not + return confidenceVals; } } -- 2.34.1