1 public class SpamFilter extends Thread {
2 DistributedHashMap mydhmap;
7 * Total number of iterations
12 * Total number of emails
17 * Total number of threads
25 public SpamFilter(int numiter, int numemail,int id, DistributedHashMap mydhmap, int nthreads) {
27 this.numemail=numemail;
29 this.mydhmap = mydhmap;
30 this.nthreads = nthreads;
48 Random rand = new Random(thid);
51 for(i=0; i<niter; i++) {
54 for(int j=0; j<nemails; j++) {
55 int pickemail = rand.nextInt(nemails);
57 // randomly pick emails
59 //System.out.println("pickemail= " + pickemail);
60 Mail email = new Mail("emails/email"+pickemail);
61 Vector signatures = email.checkMail(thid);
63 //check with global data structure
64 int[] confidenceVals=null;
66 confidenceVals = check(signatures,thid);
69 //---- create and return results --------
70 FilterResult filterResult = new FilterResult();
71 boolean filterAnswer = filterResult.getResult(confidenceVals);
73 //---- get user's take on email and send feedback ------
74 boolean userAnswer = email.getIsSpam();
76 //System.out.println("userAnswer= " + userAnswer + " filterAnswer= " + filterAnswer);
78 if(filterAnswer != userAnswer) {
79 /* wrong answer from the spam filter */
82 sendFeedBack(signatures, userAnswer, thid, rand);
86 /* Correct answer from the spam filter */
92 System.out.println((i)+"th iteration correct = " + correct + " Wrong = " + wrong + " percentage = " + ((float)correct/(float)nemails));
95 public static void main(String[] args) {
96 int[] mid = new int[8];
97 mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dc-1.calit2
98 mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dc-2.calit2
99 mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dc-3.calit2
100 mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dc-4.calit2
101 mid[4] = (128<<24)|(195<<16)|(136<<8)|166; //dc-5.calit2
102 mid[5] = (128<<24)|(195<<16)|(136<<8)|167; //dc-6.calit2
103 mid[6] = (128<<24)|(195<<16)|(136<<8)|168; //dc-7.calit2
104 mid[7] = (128<<24)|(195<<16)|(136<<8)|169; //dc-8.calit2
106 //Read options from command prompt
107 SpamFilter sf = new SpamFilter();
108 SpamFilter.parseCmdLine(args, sf);
109 int nthreads = sf.nthreads;
111 //Create Global data structure
112 DistributedHashMap dhmap;
115 dhmap = global new DistributedHashMap(10000, 0.75f);
118 spf = global new SpamFilter[nthreads];
119 for(int i=0; i<nthreads; i++) {
120 spf[i] = global new SpamFilter(sf.numiter, sf.numemail, i, dhmap, nthreads);
124 /* ---- Start Threads ---- */
126 for(int i = 0; i<nthreads; i++) {
133 /* ---- Join threads----- */
134 for(int i = 0; i<nthreads; i++) {
141 System.out.println("Finished");
144 public static void parseCmdLine(String args[], SpamFilter sf) {
147 while (i < args.length && args[i].startsWith("-")) {
150 if(arg.equals("-n")) { //num of iterations
151 if(i < args.length) {
152 sf.numiter = new Integer(args[i++]).intValue();
154 } else if(arg.equals("-e")) { //num of emails
155 if(i < args.length) {
156 sf.numemail = new Integer(args[i++]).intValue();
158 } else if(arg.equals("-t")) { //num of threads
159 if(i < args.length) {
160 sf.nthreads = new Integer(args[i++]).intValue();
162 } else if(arg.equals("-h")) {
166 if(sf.nthreads == 0) {
172 * The usage routine describing the program
174 public void usage() {
175 System.out.println("usage: ./spamfilter -n <num iterations> -e <num emails> -t <num threads>\n");
176 System.out.println( " -n : num iterations");
177 System.out.println( " -e : number of emails");
178 System.out.println( " -t : number of threads");
182 * Returns result to the Spam filter
185 public boolean checkMail(Mail mail, int userid) {
187 //Vector partsOfMailStrings = mail.createMailStringsWithURL();
189 Vector partsOfMailStrings = mail.getCommonPart();
190 partsOfMailStrings.addElement(mail.getBodyString());
193 SignatureComputer sigComp = new SignatureComputer();
194 Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of strings
196 //check with global data structure
197 int[] confidenceVals = check(signatures,userid);
199 //---- create and return results --------
200 FilterResult filterResult = new FilterResult();
201 boolean spam = filterResult.getResult(confidenceVals);
207 public int[] check(Vector signatures, int userid) {
210 //prefetch(this.mydhmap.table);
211 int numparts = signatures.size();
213 //System.out.println("check() numparts= " + numparts);
214 int[] confidenceVals = new int[numparts];
216 for(int i=0; i<numparts; i++) {
217 String part = (String)(signatures.elementAt(i));
218 char tmpengine = part.charAt(0);
219 String enginestr=null;
220 if(tmpengine == '4') { //Ephemeral Signature calculator
221 enginestr = new String("4");
223 if(tmpengine == '8') { //Whiplash Signature calculator
224 enginestr = new String("8");
226 String signaturestr = new String(part.substring(2));//a:b index of a =0, index of : =1, index of b =2
228 //find object in distributedhashMap: if no object then add object
229 HashEntry tmphe=null;
230 int hashCode = enginestr.hashCode()^signaturestr.hashCode();
232 int index1 = mydhmap.hash1(hashCode, mydhmap.table.length);
235 //prefetch(mydhmap.table[index1].array.key.stats.userstat[userid],
236 // mydhmap.table[index1].array.value,
237 // mydhmap.table[index1].array.key.stats.userid,
238 // mydhmap.table[index1].array.key.engine.value,
239 // mydhmap.table[index1].array.key.signature.value);
241 DistributedHashEntry testhe = mydhmap.table[index1];
242 boolean foundstatistics=false;
246 //prefetch(testhe.array.next.value,
247 // testhe.array.next.key.engine.value,
248 // testhe.array.next.key.stats.userid,
249 // testhe.array.next.key.stats.userstat[userid],
250 // testhe.array.next.key.signature,value);
255 boolean engineVal= inLineEquals(ptr.key.engine.value, ptr.key.engine.count, ptr.key.engine.offset,
256 enginestr.value, enginestr.count, enginestr.offset);
257 boolean SignatureVal= inLineEquals(ptr.key.signature.value, ptr.key.signature.count, ptr.key.signature.offset,
258 signaturestr.value, signaturestr.count, signaturestr.offset);
260 FilterStatistic tmpfs = ptr.value;
261 int tmpuserid = ptr.key.stats.userid[userid];
262 FilterStatistic myfs = ptr.key.stats.userstat[userid];
264 if(ptr.hashval==hashCode&&engineVal&&SignatureVal) {
265 //Found statics...get Checked value.
266 confidenceVals[i] = tmpfs.getChecked();
267 foundstatistics=true;
271 //prefetch(ptr.next.next.key.stats.userid,
272 // ptr.next.next.key.engine.value,
273 // ptr.next.next.key.signature.value,
274 // ptr.next.next.key.stats.userstat[userid],
275 // ptr.next.next.value);
280 if (!foundstatistics) {
282 //prefetch(testhe.array);
283 HashEntry myhe = global new HashEntry();
284 GString engine = global new GString(enginestr);
285 GString signature = global new GString(signaturestr);
287 myhe.setengine(engine);
288 myhe.setsig(signature);
290 DHashEntry he = global new DHashEntry();
291 //application specific fields
292 HashStat mystat = global new HashStat();
293 mystat.setuser(userid, 0, 0, -1);
294 myhe.setstats(mystat);
295 FilterStatistic myfs = global new FilterStatistic(0,0,-1);
299 //link old element into chain
303 //splice into old list
304 he.next=testhe.array;
307 //create new header...this will cause many aborts
308 DistributedHashEntry newhe=global new DistributedHashEntry();
310 mydhmap.table[index1]=newhe;
315 // --> the mail client is able to determine if it is spam or not
316 // --- According to the "any"-logic (in Core#check_logic) in original Razor ---
317 // If any answer is spam, the entire email is spam.
318 return confidenceVals;
322 * This method sends feedback from the user to a distributed
323 * spam database and trains the spam database to check future
324 * emails and detect spam
326 public void sendFeedBack(Vector signatures, boolean isSpam, int id, Random myrand) {
328 for(int i=0;i<signatures.size();i++) {
329 String part = (String)(signatures.elementAt(i));
331 // Signature is of form a:b
332 // where a = string representing a signature engine
334 // b = string representing signature
336 char tmpengine = part.charAt(0); //
340 if(tmpengine == '4') {
341 String tmpstr = new String("4");
342 engine = global new GString(tmpstr);
345 if(tmpengine == '8') {
346 String tmpstr = new String("8");
347 engine = global new GString(tmpstr);
350 //System.out.println("sendFeedBack(): engine= " + engine.toLocalString());
352 String tmpsig = new String(part.substring(2));
353 GString signature = global new GString(tmpsig);
355 //System.out.println("sendFeedBack(): signature= " + signature.toLocalString());
357 HashEntry myhe = global new HashEntry();
358 myhe.setengine(engine);
359 myhe.setsig(signature);
362 // ----- now connect to global data structure and update stats -----
363 HashEntry tmphe=null;
364 FilterStatistic fs=null;
365 int hashCode = myhe.hashCode();
366 int index1 = mydhmap.hash1(hashCode, mydhmap.table.length);
367 DistributedHashEntry testhe = mydhmap.table[index1];
369 DHashEntry ptr=testhe.array;
371 boolean engineVal= inLineEquals(ptr.key.engine.value, ptr.key.engine.count, ptr.key.engine.offset,
372 myhe.engine.value, myhe.engine.count, myhe.engine.offset);
373 boolean SignatureVal= inLineEquals(ptr.key.signature.value, ptr.key.signature.count, ptr.key.signature.offset,
374 myhe.signature.value, myhe.signature.count, myhe.signature.offset);
376 if(ptr.hashval==hashCode&&engineVal&&SignatureVal) {
384 //tmphe has the key at the end
385 //fs has the value at the end
391 if(tmphe.stats.userid[id] != 1) {
392 tmphe.stats.setuserid(id);
396 //---- get value from distributed hash and update spam count
398 //Allow users to give incorrect feedback
399 int pickemail = myrand.nextInt(100);
400 /* Randomly allow user to provide incorrect feedback */
402 //give correct feedback 95% of times
403 //Increment spam or ham value
405 tmphe.stats.incSpamCount(id);
408 tmphe.stats.incHamCount(id);
412 // Give incorrect feedback 5% of times
414 tmphe.stats.incHamCount(id);
417 tmphe.stats.incSpamCount(id);
422 }//end of sendFeedback
424 public static boolean inLineEquals(char[] array1, int count1, int offset1, char[] array2, int count2, int offset2) {
427 for(int i=0; i<count1; i++) {
428 if(array1[i+offset1] != array2[i+offset2]) {