4 Script that takes a file (output by wireshark/tshark, in JSON format) and analyze
5 the traffic frequency of a certain device at a certain time.
11 from collections import defaultdict
12 from dateutil import parser
13 from datetime import datetime
16 JSON_KEY_SOURCE = "_source"
17 JSON_KEY_LAYERS = "layers"
20 JSON_KEY_ETH_DST = "eth.dst"
21 JSON_KEY_ETH_SRC = "eth.src"
22 JSON_KEY_FRAME = "frame"
23 JSON_KEY_FRAME_TIME = "frame.time"
24 TABLE_HEADER_X = "Timestamp (hh:mm:ss)"
25 TABLE_HEADER_Y = "Packet frequency (pps)"
26 INCOMING_APPENDIX = "_incoming"
27 OUTGOING_APPENDIX = "_outgoing"
28 FILE_APPENDIX = ".dat"
30 # Use this constant as a flag
32 USE_MOVING_AVERAGE = False
34 # Range = 6, i.e. 3 to left and 3 to right (in seconds)
35 TOTAL_RANGE = 60 # TOTAL_RANGE = 2 x RANGE
39 def moving_average(array, window=3):
40 """ Calculate moving average
42 array: array of numbers
43 window: window of moving average (default = 3)
45 https://stackoverflow.com/questions/14313510/how-to-calculate-moving-average-using-numpy
47 # Check if window > len(array)
48 if window > len(array):
50 # Calculate cumulative sum of each array element
51 retarr = np.cumsum(array, dtype=float)
52 # Adjust cumulative sum of each array element
53 # based on window size
54 retarr[window:] = retarr[window:] - retarr[:-window]
55 # Pad the first array elements with zeroes
56 retarr[:window - 1] = np.zeros(window - 1)
57 # Calculate moving average starting from the element
58 # at window size, e.g. element 4 for window=5
59 retarr[window - 1:] = retarr[window - 1:] / window
62 def hms_to_seconds(t):
63 """ Calculate hms to seconds
65 t = time in hh:mm:ss string
67 https://stackoverflow.com/questions/10742296/python-time-conversion-hms-to-seconds
69 h, m, s = [int(i) for i in t.split(':')]
70 return 3600*h + 60*m + s
72 def seconds_to_hms(t):
73 """ Calculate seconds to hms
77 https://stackoverflow.com/questions/10742296/python-time-conversion-hms-to-seconds
80 m = (t - (h * 3600)) / 60
81 s = t - (h * 3600) - (m * 60)
91 return hh + ":" + mm + ":" + ss
93 def save_to_file(tblheader, dictionary, filenameout):
94 """ Show summary of statistics of PCAP file
96 tblheader: header for the saved table
97 dictionary: dictionary to be saved
98 filename_out: file name to save
100 # Appending, not overwriting!
101 f = open(filenameout, 'a')
102 # Write the table header
103 f.write("# " + tblheader + "\n")
104 f.write("# " + TABLE_HEADER_X + " " + TABLE_HEADER_Y + "\n")
105 # Write "0 0" if dictionary is empty
109 print "Writing zeroes to file: ", filenameout
112 if USE_MOVING_AVERAGE:
113 # Use moving average if this flag is true
115 for key in sorted(dictionary):
116 sortedarr.append(dictionary[key])
117 valarr = moving_average(sortedarr, WINDOW_SIZE)
119 # Iterate over dictionary and write (key, value) pairs
121 for key in sorted(dictionary):
123 f.write(str(key) + " " + str(valarr[ind]) + "\n")
127 # Iterate over dictionary and write (key, value) pairs
131 for key in sorted(dictionary):
132 sortedlist.append(key)
133 print "Key: ", key, " - Value: ", dictionary[key], " - Ind: ", ind
135 first = hms_to_seconds(sortedlist[0])
136 #print "First: ", key
137 last = hms_to_seconds(sortedlist[ind-1])
140 # Put new binning keys
143 while time_ind < last:
144 # Initialize with the first key in the list
145 curr_key = sortedlist[ind]
146 curr_key_secs = hms_to_seconds(curr_key)
147 # Initialize with 0 first
148 resultdict[time_ind] = 0
149 # Check if this is still within RANGE - bin the value if it is
150 while time_ind - RANGE <= curr_key_secs and curr_key_secs <= time_ind + RANGE:
151 resultdict[time_ind] += dictionary[curr_key]
152 print "Time index: ", seconds_to_hms(time_ind), " Value: ", resultdict[time_ind]
154 if ind > len(dictionary)-1:
156 # Initialize with the key in the list
157 curr_key = sortedlist[ind]
158 curr_key_secs = hms_to_seconds(curr_key)
159 # Increment time index
160 time_ind += TOTAL_RANGE
161 # Now write to file after binning
162 for key in sorted(resultdict):
164 f.write(seconds_to_hms(key) + " " + str(resultdict[key]) + "\n")
165 #print seconds_to_hms(key) + " " + str(resultdict[key])
167 # Iterate over dictionary and write (key, value) pairs
168 for key in sorted(dictionary):
170 f.write(str(key) + " " + str(dictionary[key]) + "\n")
172 print "Writing output to file: ", filenameout
178 if len(sys.argv) < 5:
179 print "Usage: python", sys.argv[0], "<input_file> <output_file> <device_name> <mac_address>"
181 # Parse the file for the specified MAC address
182 timefreq_incoming = parse_json(sys.argv[1], sys.argv[4], True)
183 #timefreq_outgoing = parse_json(sys.argv[1], sys.argv[4], False)
184 # Write statistics into file
185 print "====================================================================="
186 print "==> Analyzing incoming traffic ..."
187 save_to_file(sys.argv[3] + INCOMING_APPENDIX, timefreq_incoming, sys.argv[2] + INCOMING_APPENDIX + FILE_APPENDIX)
188 print "====================================================================="
189 #print "==> Analyzing outgoing traffic ..."
190 #save_to_file(sys.argv[3] + OUTGOING_APPENDIX, timefreq_outgoing, sys.argv[2] + OUTGOING_APPENDIX + FILE_APPENDIX)
191 #print "====================================================================="
192 #for time in time_freq.keys():
193 #for key in sorted(time_freq):
194 # print key, " => ", time_freq[key]
195 #print "====================================================================="
198 # Convert JSON file containing DNS traffic to a map in which a hostname points to its set of associated IPs.
199 def parse_json(filepath, macaddress, incomingoutgoing):
200 """ Show summary of statistics of PCAP file
202 filepath: path of the read file
203 macaddress: MAC address of a device to analyze
204 incomingoutgoing: boolean to define whether we collect incoming or outgoing traffic
205 True = incoming, False = outgoing
207 # Maps timestamps to frequencies of packets
209 with open(filepath) as jf:
211 # data becomes reference to root JSON object (or in our case json array)
213 # Loop through json objects in data
214 # Each entry is a pcap entry (request/response (packet) and associated metadata)
216 # p is a JSON object, not an index
217 layers = p[JSON_KEY_SOURCE][JSON_KEY_LAYERS]
219 frame = layers.get(JSON_KEY_FRAME, None)
220 datetime = frame.get(JSON_KEY_FRAME_TIME, None)
221 # Get into the Ethernet address part
222 eth = layers.get(JSON_KEY_ETH, None)
223 # Skip any non DNS traffic
225 print "[ WARNING: Packet has no ethernet address! ]"
227 # Get source and destination MAC addresses
228 src = eth.get(JSON_KEY_ETH_SRC, None)
229 dst = eth.get(JSON_KEY_ETH_DST, None)
230 # Get just the time part
231 datetimeobj = parser.parse(datetime)
232 # Remove the microsecond part
233 timestr = str(datetimeobj.time())[:8]
234 print str(timestr) + " - src:" + str(src) + " - dest:" + str(dst)
235 # Get and count the traffic for the specified MAC address
237 if dst == macaddress:
238 # Check if timestamp already exists in the map
239 # If yes, then just increment the frequency value...
240 if timestr in timefreq:
241 timefreq[timestr] = timefreq[timestr] + 1
242 else: # If not, then put the value one there
243 timefreq[timestr] = 1
245 if src == macaddress:
246 # Check if timestamp already exists in the map
247 # If yes, then just increment the frequency value...
248 if timestr in timefreq:
249 timefreq[timestr] = timefreq[timestr] + 1
250 else: # If not, then put the value one there
251 timefreq[timestr] = 1
256 if __name__ == '__main__':