3 Part of the Spamato project (www.spamato.net)
4 Copyright (C) 2005 ETHZ, DCG
5 contact by email: info@spamato.net
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 $Id: WhiplashSignature.java,v 1.3 2009/11/14 02:10:08 adash Exp $
24 public class WhiplashSignature {
27 public WhiplashSignature() {
28 b64table = new char[64];
30 for (int i= 0; i <= 25; i++) {
31 b64table[i] = (char) ((i + 65) & 0xff);
33 for (int i= 26; i <= 51; i++) {
34 b64table[i] = (char) ((i + 71) & 0xff);
36 for (int i= 52; i <= 61; i++) {
37 b64table[i] = (char) ((i - 4) & 0xff);
43 public String[] computeSignature(String text) {
45 //System.out.println("Inside computeSignature");
46 //Current: Simplify the host extraction and signature computation
47 String[] sigs = whiplash(text);
48 // TODO: Extract canonical domain name and convert to Base64
51 for(int i = 0; i<sigs.length; i++) {
52 sigs[i] = hexToBase64(sigs[i]);
53 System.out.println("sigs[i]= " + sigs[i]);
61 * converts a hex-string in a base64-string exactly as it is done in razor.
62 * @param hex a hex-value
63 * @return a base64-equivalent of <code>hex</code>.
65 public String hexToBase64(String hex){
68 int[] b64s = new int[hex.length()*2/3 + ((hex.length()*2)%3)];
72 while(i < hex.length()){
73 //process 3 hex char chunks at a time
74 int upperBorder = Math.imin(i+3,hex.length());
75 String hex3 = hex.substring(i,upperBorder);
78 int bv = convertHexToRazorEncoding(hex3);
79 //now the right endian encoding
80 b64s[b64count++] = ((0xfc0 & bv)>>>6); //higher 6 bits
81 b64s[b64count++] = (0x3f & bv) ; //lower 6 bits
85 for (int j= 0; j < b64s.length; j++) {
86 bs += b64table[ b64s[j] ];
92 * razor does some special conversion using perl's <code>pack()</code> which
93 * we must do manually in java.
95 private int convertHexToRazorEncoding(String hex3) {
99 int cur = Integer.parseInt(hex3.substring(0,1),16);
100 cur = mirror4LSBits(cur);
101 res |= ( (cur&0xf) << 8);
102 if(hex3.length() >=2) {
103 cur = Integer.parseInt(hex3.substring(1,2),16);
107 //cur = ( hex3.length() >=2 ? Integer.parseInt(hex3.substring(1,2),16) : 0);
108 cur = mirror4LSBits(cur);
109 res |= ((cur & 0xf) << 4);
110 if(hex3.length() >= 3) {
111 cur = Integer.parseInt(hex3.substring(2,3),16);
115 //cur = ( hex3.length() >= 3 ? Integer.parseInt(hex3.substring(2,3),16): 0);
116 cur = mirror4LSBits(cur);
123 * mirrors the 4 least significant bytes of an integer
124 * @param cur an int containing 4 Least Singificant bytes like <code>00000...00abcd</code>
125 * @return the mirrored 4 least significant bytes <code>00000...00dcba</code>. all bits except <code>a-b</code> are lost.
127 public int mirror4LSBits(int cur) {
129 res |= (cur & 0x8)>>>3;
130 res |= (cur & 0x4)>>>1;
131 res |= (cur & 0x2)<<1;
132 res |= (cur & 0x1)<<3;
136 public String[] whiplash(String text) {
141 String[] hosts = extractHosts(text);
142 if (hosts == null || hosts.length < 1) {
145 String[] sigs = new String[hosts.length];
147 for (int i = 0; i < hosts.length; i++) {
149 String host = hosts[i];
150 int len = host.length();
151 byte buf[] = host.getBytes();
152 byte sig[] = new byte[16];
155 String signature = new String(sig);
157 // System.out.println("DEBUG: host= " + host + " whiplash sig= " + signature);
164 public String[] extractHosts(String text) {
165 //System.out.println("Inside extractHosts");
166 Vector hosts = new Vector();
167 String buf = new String(text);
169 System.out.println("DEBUG: extractHosts() string= " + buf);
171 /* Extract hosts from http:// links */
173 String strwww = new String("www.");
174 while ((idx = buf.indexOf(strwww)) != -1) {
175 int startidx = idx + strwww.length();
176 String strcom = new String(".");
177 buf = buf.subString(startidx);
178 int endidx = buf.indexOf(strcom);
179 String host = buf.subString(0, endidx);
180 System.out.println("http links extracted host= " + host);
181 hosts.addElement(host);
182 buf = buf.subString(endidx+strcom.length());
185 /* Extract hosts from email addressess */
186 buf = new String(text);
187 String strrate = new String("@");
188 while ((idx = buf.indexOf(strrate)) != -1) {
189 int startidx = idx + strrate.length();
190 String strdot = new String(".");
191 buf = buf.subString(startidx);
192 int endidx = buf.indexOf(strdot);
193 String host = buf.subString(0, endidx);
194 System.out.println("email addr extracted host= " + host);
195 hosts.addElement(host);
196 buf = buf.subString(endidx+strdot.length());
199 if (hosts.size() == 0) {
203 String[] retbuf = new String[hosts.size()];
204 for (int i = 0; i < hosts.size(); i++) {
205 retbuf[i] = (String) (hosts.elementAt(i));
211 // Testing the signature computation
212 // public static void main(String[] args) {
213 // /* String testVector = " Test Vectors: \n"+
215 // "1. http:www.nodg.com@www.geocities.com/nxcisdsfdfdsy/off\n"+
216 // "2. http:www.ksleybiuh.com@213.171.60.74/getoff/\n"+
217 // "3. <http:links.verotel.com/cgi-bin/showsite.verotel?vercode=12372:9804000000374206>\n"+
218 // "4. http:217.12.4.7/rmi/http:definethis.net/526/index.html\n"+
219 // "5. http:magalygr8sex.free-host.com/h.html\n"+
220 // "6. http:%3CVenkatrs%3E@218.80.74.102/thecard/4index.htm\n"+
221 // "7. http:EBCDVKIGURGGCEOKXHINOCANVQOIDOXJWTWGPC@218.80.74.102/thecard/5in\n"+
222 // "8. http:g.india2.bag.gs/remove_page.htm\n"+
223 // "9. https:220.97.40.149\n"+
224 // "10. http:mjaked.biz/unsubscribe.ddd?leaving\n"+
225 // "11. http:g5j99m8@it.rd.yahoo.com/bassi/*http:www.lekobas.com/c/index.php\n"+
226 // "12. <a href=\"http:Chettxuydyhv vwyyrcmgbxzj n as ecq kkurxtrvaug nfsygjjjwhfkpaklh t a qsc exinscfjtxr\n"+
227 // " jobg @www.mmv9.org?affil=19\">look great / feel great</a>\n"+
228 // "13. <A HREF=\"http:href=www.churchwomen.comhref=www.cairn.nethref=www.teeter.orghref=www.lefty.bizhref=wwwbehold.pitfall@www.mmstong5f.com/host/index.asp?ID=01910?href=www.corrode.comhref=www.ode.nethref=www.clergy.orghref=www.aberrate.biz\" >\n"+
229 // "14. www.pillzthatwork.com # anything that starts with www. \n";
231 // String testVector = "<html>\n"+
233 // "<p>Our first autolink: www.autolink1.com or another link like www.autolink2.co.uk or how about https:plaintextlink1.co.uk or http:plaintextlink2.com</p>\n"+
234 // "<p>now a masked link <a href=\"http://www.hiddenlink1.com\">http://www.coveringlink1.com</a> and another link http:plaintextlink3.net and how about https:plaintextlink4.to</p>\n"+
235 // "<p>another masked link <A Href=\"http://www.hiddenlink2.com\">https:coveringlink2.com</A> and another link https:plaintextlink5.com</p>\n"+
238 // String test1 = "Our first autolink: www.autolink1.com or another link like www.autolink2.co.uk or how about https:plaintextlink1.co.uk or http:plaintextlink2.com</p>\n";
239 // WhiplashSignature whiplash = new WhiplashSignature();
240 // String[] hosts = whiplash.computeSignature(testVector);
241 // //String[] hosts = whiplash.computeSignature(test1);
242 // for (int i = 0; i < hosts.length; i++) {
243 // String string = hosts[i];
244 // System.out.println("host " + i + ":\t" + string);