1 public class SpamFilter extends Thread {
2 DistributedHashMap mydhmap;
7 * Total number of iterations
12 * Total number of emails
17 * Total number of threads
25 public SpamFilter(int numiter, int numemail,int id, DistributedHashMap mydhmap, int nthreads) {
27 this.numemail=numemail;
29 this.mydhmap = mydhmap;
30 this.nthreads = nthreads;
43 Random rand = new Random(0);
45 for(int i=0; i<niter; i++) {
46 for(int j=0; j<nemails; j++) {
47 int pickemail = rand.nextInt(100);
48 Mail email = new Mail("emails/email"+pickemail);
49 //Mail email = getEmail(pickemail);
50 Vector signatures = email.checkMail(thid);
51 //check with global data structure
52 int[] confidenceVals=null;
54 confidenceVals = check(signatures,thid);
57 //---- create and return results --------
58 FilterResult filterResult = new FilterResult();
59 boolean filterAnswer = filterResult.getResult(confidenceVals);
61 boolean userAnswer = email.getIsSpam();
62 if(filterAnswer != userAnswer) {
64 sendFeedBack(email, userAnswer, thid);
71 public static void main(String[] args) {
72 int[] mid = new int[8];
73 mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dc-1.calit2
74 mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dc-2.calit2
75 mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dc-3.calit2
76 mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dc-4.calit2
77 mid[4] = (128<<24)|(195<<16)|(136<<8)|166; //dc-5.calit2
78 mid[5] = (128<<24)|(195<<16)|(136<<8)|167; //dc-6.calit2
79 mid[6] = (128<<24)|(195<<16)|(136<<8)|168; //dc-7.calit2
80 mid[7] = (128<<24)|(195<<16)|(136<<8)|169; //dc-8.calit2
82 //Read options from command prompt
83 SpamFilter sf = new SpamFilter();
84 SpamFilter.parseCmdLine(args, sf);
85 int nthreads = sf.nthreads;
87 Random rand = new Random(8);
88 //Randomly set Spam vals for each email
89 for(int i=0; i<sf.numemail; i++) {
90 Mail email = new Mail("./emails/email"+i);
91 int spamval = rand.nextInt(100);
92 if(spamval<60) { //assume 60% are spam and rest are ham
93 email.setIsSpam(false);
95 email.setIsSpam(true);
99 //Create Global data structure
100 DistributedHashMap dhmap;
103 dhmap = global new DistributedHashMap(500, 0.75f);
104 spf = global new SpamFilter[nthreads];
105 for(int i=0; i<nthreads; i++) {
106 spf[i] = global new SpamFilter(sf.numiter, sf.numemail, i, dhmap, nthreads);
110 /* ---- Start Threads ---- */
112 for(int i = 0; i<nthreads; i++) {
119 /* ---- Join threads----- */
120 for(int i = 0; i<nthreads; i++) {
127 System.out.println("Finished");
130 public static void parseCmdLine(String args[], SpamFilter sf) {
133 while (i < args.length && args[i].startsWith("-")) {
136 if(arg.equals("-n")) { //num of iterations
137 if(i < args.length) {
138 sf.numiter = new Integer(args[i++]).intValue();
140 } else if(arg.equals("-e")) { //num of emails
141 if(i < args.length) {
142 sf.numemail = new Integer(args[i++]).intValue();
144 } else if(arg.equals("-t")) { //num of threads
145 if(i < args.length) {
146 sf.nthreads = new Integer(args[i++]).intValue();
148 } else if(arg.equals("-h")) {
152 if(sf.nthreads == 0) {
158 * The usage routine describing the program
160 public void usage() {
161 System.out.println("usage: ./spamfilter -n <num iterations> -e <num emails> -t <num threads>\n");
162 System.out.println( " -n : num iterations");
163 System.out.println( " -e : number of emails");
164 System.out.println( " -t : number of threads");
168 * Returns result to the Spam filter
171 public boolean checkMail(Mail mail, int userid) {
173 //Vector partsOfMailStrings = mail.createMailStringsWithURL();
175 Vector partsOfMailStrings = mail.getCommonPart();
176 partsOfMailStrings.addElement(mail.getBodyString());
179 SignatureComputer sigComp = new SignatureComputer();
180 Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of strings
182 //check with global data structure
183 int[] confidenceVals = check(signatures,userid);
185 //---- create and return results --------
186 FilterResult filterResult = new FilterResult();
187 boolean spam = filterResult.getResult(confidenceVals);
193 public int[] check(Vector signatures, int userid) {
194 int numparts = signatures.size();
195 int[] confidenceVals = new int[numparts];
196 for(int i=0; i<numparts; i++) {
197 String part = (String)(signatures.elementAt(i));
198 char tmpengine = part.charAt(0);
199 String engine = global new String(tmpengine);
200 String signature = global new String(part.substring(2));
201 //String signature = part.substring(2); //a:b index(a)=0, index(:)=1, index(b)=2
202 HashEntry myhe = global new HashEntry();
203 myhe.setengine(engine);
204 myhe.setsig(signature);
206 //find object in distributedhashMap: if no object then add object
208 if(!mydhmap.containsKey(myhe)) {
210 myhe.stats = global new HashStat();
211 myhe.stats.setuser(userid, 0, 0, -1);
212 FilterStatistic fs = global new FilterStatistic(0,0,-1);
213 mydhmap.put(myhe, fs);
215 // ----- now connect to global data structure and ask for spam -----
216 HashEntry tmphe = (HashEntry)(mydhmap.getKey(myhe));
217 FilterStatistic fs = (FilterStatistic) (mydhmap.get(myhe)); //get the value from hash
218 confidenceVals[i] = fs.getChecked();
222 // --> the mail client is able to determine if it is spam or not
223 return confidenceVals;
226 public void sendFeedBack(Mail mail, boolean isSpam, int id) {
227 Vector partsOfMailStrings = mail.getCommonPart();
228 partsOfMailStrings.addElement(mail.getBodyString());
230 SignatureComputer sigComp = new SignatureComputer();
231 Vector signatures = sigComp.computeSigs(partsOfMailStrings);//vector of strings
233 for(int i=0;i<signatures.size();i++) {
234 String part = (String)(signatures.elementAt(i));
235 char tmpengine = part.charAt(0);
236 String engine = global new String(tmpengine);
237 String signature = global new String(part.substring(2));
238 //String signature = part.substring(2); //a:b index(a)=0, index(:)=1, index(b)=2
239 HashEntry myhe = global new HashEntry();
240 myhe.setengine(engine);
241 myhe.setsig(signature);
243 // ----- now connect to global data structure and upate spam count -----
244 HashEntry tmphe = (HashEntry)(mydhmap.getKey(myhe));
245 if(tmphe.stats.userid[id] != 1) {
246 tmphe.stats.setuserid(id);
249 FilterStatistic fs = (FilterStatistic) (mydhmap.get(myhe)); //get the value from hash
251 //Increment spam or ham value
253 tmphe.stats.incSpamCount(id);
256 tmphe.stats.incHamCount(id);