1 public class SpamFilter extends Thread {
2 DistributedHashMap mydhmap;
7 * Total number of iterations
12 * Total number of emails
17 * Total number of threads
25 public SpamFilter(int numiter, int numemail,int id, DistributedHashMap mydhmap, int nthreads) {
27 this.numemail=numemail;
29 this.mydhmap = mydhmap;
30 this.nthreads = nthreads;
45 Random rand = new Random(thid);
46 Random myrand = new Random(0);
48 for(int i=0; i<niter; i++) {
51 for(int j=0; j<nemails; j++) {
52 int pickemail = rand.nextInt(100);
54 //System.out.println("pickemail= " + pickemail);
56 // randomly pick emails
58 Mail email = new Mail("emails/email"+pickemail);
59 Vector signatures = email.checkMail(thid);
61 //check with global data structure
62 int[] confidenceVals=null;
64 confidenceVals = check(signatures,thid);
68 for(int k=0; k<signatures.size();k++) {
69 System.out.println("confidenceVals["+k+"]= "+confidenceVals[k]);
73 //---- create and return results --------
74 FilterResult filterResult = new FilterResult();
75 boolean filterAnswer = filterResult.getResult(confidenceVals);
77 //---- get user's take on email and send feedback ------
78 boolean userAnswer = email.getIsSpam();
80 // System.out.println("userAnswer= " + userAnswer + " filterAnswer= " + filterAnswer);
82 if(filterAnswer != userAnswer) {
85 sendFeedBack(signatures, userAnswer, thid);
91 System.out.println((i+1)+"th iteration correct = " + correct + " Wrong = " + wrong + " percentage = " + ((float)correct/(float)nemails));
95 public static void main(String[] args) {
96 int[] mid = new int[8];
97 mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dc-1.calit2
98 mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dc-2.calit2
99 mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dc-3.calit2
100 mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dc-4.calit2
101 mid[4] = (128<<24)|(195<<16)|(136<<8)|166; //dc-5.calit2
102 mid[5] = (128<<24)|(195<<16)|(136<<8)|167; //dc-6.calit2
103 mid[6] = (128<<24)|(195<<16)|(136<<8)|168; //dc-7.calit2
104 mid[7] = (128<<24)|(195<<16)|(136<<8)|169; //dc-8.calit2
106 //Read options from command prompt
107 SpamFilter sf = new SpamFilter();
108 SpamFilter.parseCmdLine(args, sf);
109 int nthreads = sf.nthreads;
111 //Create Global data structure
112 DistributedHashMap dhmap;
115 dhmap = global new DistributedHashMap(500, 0.75f);
118 spf = global new SpamFilter[nthreads];
119 for(int i=0; i<nthreads; i++) {
120 spf[i] = global new SpamFilter(sf.numiter, sf.numemail, i, dhmap, nthreads);
124 /* ---- Start Threads ---- */
126 for(int i = 0; i<nthreads; i++) {
133 /* ---- Join threads----- */
134 for(int i = 0; i<nthreads; i++) {
141 System.out.println("Finished");
144 public static void parseCmdLine(String args[], SpamFilter sf) {
147 while (i < args.length && args[i].startsWith("-")) {
150 if(arg.equals("-n")) { //num of iterations
151 if(i < args.length) {
152 sf.numiter = new Integer(args[i++]).intValue();
154 } else if(arg.equals("-e")) { //num of emails
155 if(i < args.length) {
156 sf.numemail = new Integer(args[i++]).intValue();
158 } else if(arg.equals("-t")) { //num of threads
159 if(i < args.length) {
160 sf.nthreads = new Integer(args[i++]).intValue();
162 } else if(arg.equals("-h")) {
166 if(sf.nthreads == 0) {
172 * The usage routine describing the program
174 public void usage() {
175 System.out.println("usage: ./spamfilter -n <num iterations> -e <num emails> -t <num threads>\n");
176 System.out.println( " -n : num iterations");
177 System.out.println( " -e : number of emails");
178 System.out.println( " -t : number of threads");
182 * Returns result to the Spam filter
185 public boolean checkMail(Mail mail, int userid) {
187 //Vector partsOfMailStrings = mail.createMailStringsWithURL();
189 Vector partsOfMailStrings = mail.getCommonPart();
190 partsOfMailStrings.addElement(mail.getBodyString());
193 SignatureComputer sigComp = new SignatureComputer();
194 Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of strings
196 //check with global data structure
197 int[] confidenceVals = check(signatures,userid);
199 //---- create and return results --------
200 FilterResult filterResult = new FilterResult();
201 boolean spam = filterResult.getResult(confidenceVals);
207 public int[] check(Vector signatures, int userid) {
208 int numparts = signatures.size();
210 //System.out.println("check() numparts= " + numparts);
212 int[] confidenceVals = new int[numparts];
213 for(int i=0; i<numparts; i++) {
214 String part = (String)(signatures.elementAt(i));
215 char tmpengine = part.charAt(0);
217 if(tmpengine == '4') { //Ephemeral Signature calculator
218 String tmpstr = new String("4");
219 engine = global new GString(tmpstr);
221 if(tmpengine == '8') { //Whiplash Signature calculator
222 String tmpstr = new String("8");
223 engine = global new GString(tmpstr);
226 //System.out.println("check(): engine= " + engine.toLocalString());
228 String str = new String(part.substring(2));//a:b index of a =0, index of : =1, index of b =2
229 GString signature = global new GString(str);
230 HashEntry myhe = global new HashEntry();
231 myhe.setengine(engine);
232 myhe.setsig(signature);
234 //find object in distributedhashMap: if no object then add object
235 if(!mydhmap.containsKey(myhe)) {
237 HashStat mystat = global new HashStat();
238 mystat.setuser(userid, 0, 0, -1);
239 myhe.setstats(mystat);
240 FilterStatistic fs = global new FilterStatistic(0,0,-1);
241 mydhmap.put(myhe, fs);
242 confidenceVals[i] = 0;
243 } else { //read exsisting object
244 // ----- now connect to global data structure and ask for spam -----
245 HashEntry tmphe = (HashEntry)(mydhmap.getKey(myhe));
246 FilterStatistic fs = (FilterStatistic) (mydhmap.get(tmphe)); //get the value from hash
248 //System.out.println(fs.toString()+"\n");
250 confidenceVals[i] = fs.getChecked();
254 // --> the mail client is able to determine if it is spam or not
255 // --- According to the "any"-logic (in Core#check_logic) in original Razor ---
256 // If any answer is spam, the entire email is spam.
257 return confidenceVals;
261 * This method sends feedback from the user to a distributed
262 * spam database and trains the spam database to check future
263 * emails and detect spam
265 public void sendFeedBack(Vector signatures, boolean isSpam, int id) {
267 for(int i=0;i<signatures.size();i++) {
268 String part = (String)(signatures.elementAt(i));
270 // Signature is of form a:b
271 // where a = string representing a signature engine
273 // b = string representing signature
275 char tmpengine = part.charAt(0); //
279 if(tmpengine == '4') {
280 String tmpstr = new String("4");
281 engine = global new GString(tmpstr);
284 if(tmpengine == '8') {
285 String tmpstr = new String("8");
286 engine = global new GString(tmpstr);
289 //System.out.println("sendFeedBack(): engine= " + engine.toLocalString());
291 String tmpsig = new String(part.substring(2));
292 GString signature = global new GString(tmpsig);
294 //System.out.println("sendFeedBack(): signature= " + signature.toLocalString());
296 HashEntry myhe = global new HashEntry();
297 myhe.setengine(engine);
298 myhe.setsig(signature);
300 // ----- now connect to global data structure and update stats -----
301 HashEntry tmphe = (HashEntry)(mydhmap.getKey(myhe));
302 if(tmphe.stats.userid[id] != 1) {
303 tmphe.stats.setuserid(id);
306 //---- get value from distributed hash and update spam count
307 FilterStatistic fs = (FilterStatistic) (mydhmap.get(myhe));
309 //System.out.println(fs.toString());
311 //TODO: Allow users to give incorrect feedback
313 //Increment spam or ham value
315 tmphe.stats.incSpamCount(id);
318 tmphe.stats.incHamCount(id);