import sys
import json
+import numpy as np
from collections import defaultdict
from dateutil import parser
+from datetime import datetime
+from decimal import *
JSON_KEY_SOURCE = "_source"
JSON_KEY_LAYERS = "layers"
JSON_KEY_ETH_SRC = "eth.src"
JSON_KEY_FRAME = "frame"
JSON_KEY_FRAME_TIME = "frame.time"
+JSON_KEY_FRAME_LEN = "frame.len"
TABLE_HEADER_X = "Timestamp (hh:mm:ss)"
-TABLE_HEADER_Y = "Packet frequency (pps)"
+TABLE_HEADER_Y = "Packet frequency"
+INCOMING_APPENDIX = "_incoming"
+OUTGOING_APPENDIX = "_outgoing"
+FILE_APPENDIX = ".dat"
+# Use this constant as a flag
+WINDOW_SIZE = 5
+USE_MOVING_AVERAGE = False
+USE_BINNING = True
+# Range = 6, i.e. 3 to left and 3 to right (in seconds)
+#TOTAL_RANGE = 60 # TOTAL_RANGE = 2 x RANGE
+#RANGE = 30
+TOTAL_RANGE = 20 # TOTAL_RANGE = 2 x RANGE
+RANGE = 10
-def save_to_file(tbl_header, dictionary, filename_out):
+def moving_average(array, window=3):
+ """ Calculate moving average
+ Args:
+ array: array of numbers
+ window: window of moving average (default = 3)
+ Adapted from:
+ https://stackoverflow.com/questions/14313510/how-to-calculate-moving-average-using-numpy
+ """
+ # Check if window > len(array)
+ if window > len(array):
+ window = len(array)
+ # Calculate cumulative sum of each array element
+ retarr = np.cumsum(array, dtype=float)
+ # Adjust cumulative sum of each array element
+ # based on window size
+ retarr[window:] = retarr[window:] - retarr[:-window]
+ # Pad the first array elements with zeroes
+ retarr[:window - 1] = np.zeros(window - 1)
+ # Calculate moving average starting from the element
+ # at window size, e.g. element 4 for window=5
+ retarr[window - 1:] = retarr[window - 1:] / window
+ return retarr
+
+def hms_to_seconds(t):
+ """ Calculate hms to seconds
+ Args:
+ t = time in hh:mm:ss string
+ Adapted from:
+ https://stackoverflow.com/questions/10742296/python-time-conversion-hms-to-seconds
+ """
+ h, m, s = [int(i) for i in t.split(':')]
+ return 3600*h + 60*m + s
+
+def seconds_to_hms(t):
+ """ Calculate seconds to hms
+ Args:
+ t = time in seconds
+ Adapted from:
+ https://stackoverflow.com/questions/10742296/python-time-conversion-hms-to-seconds
+ """
+ h = t / 3600
+ m = (t - (h * 3600)) / 60
+ s = t - (h * 3600) - (m * 60)
+ hh = str(h)
+ if len(hh) is 1:
+ hh = "0" + hh
+ mm = str(m)
+ if len(mm) is 1:
+ mm = "0" + mm
+ ss = str(s)
+ if len(ss) is 1:
+ ss = "0" + ss
+ return hh + ":" + mm + ":" + ss
+
+def include_timestamps_zero_packets(timelen):
+ """ Include every second that has zero packets (no packets/transmission)
+ Args:
+ timelen = dictionary that maps timestamps to packet length
+ """
+ sortedkeylist = []
+ for key in sorted(timelen):
+ sortedkeylist.append(key)
+ first = sortedkeylist[0]
+ last = sortedkeylist[len(sortedkeylist)-1]
+ # Calculate the number of seconds between first and last packets
+ first_seconds = hms_to_seconds(first)
+ last_seconds = hms_to_seconds(last)
+ seconds = last_seconds - first_seconds
+ # Start counting and filling in timestamps with zero packets
+ counter = 0
+ while counter < seconds:
+ timestamp = seconds_to_hms(first_seconds + counter)
+ if timestamp not in timelen:
+ timelen[timestamp] = 0
+ counter += 1
+ return timelen
+
+
+def save_to_file(tblheader, dictionary, filenameout):
""" Show summary of statistics of PCAP file
Args:
- tbl_header: header for the saved table
+ tblheader: header for the saved table
dictionary: dictionary to be saved
filename_out: file name to save
"""
# Appending, not overwriting!
- f = open(filename_out, 'a')
+ f = open(filenameout, 'a')
# Write the table header
- f.write("# " + TABLE_HEADER_X + " " + TABLE_HEADER_Y + "\n");
- # Iterate over dictionary and write (key, value) pairs
- for key in sorted(dictionary):
- # Comma separated
- #f.write(str(key) + ", " + str(dictionary[key]) + "\n")
- # Space separated
- f.write(str(key) + " " + str(dictionary[key]) + "\n")
+ f.write("# " + tblheader + "\n")
+ f.write("# " + TABLE_HEADER_X + " " + TABLE_HEADER_Y + "\n")
+ # Write "0 0" if dictionary is empty
+ if not dictionary:
+ f.write("0 0")
+ f.close()
+ print "Writing zeroes to file: ", filenameout
+ return
+
+ if USE_MOVING_AVERAGE:
+ # Use moving average if this flag is true
+ sortedarr = []
+ for key in sorted(dictionary):
+ sortedarr.append(dictionary[key])
+ valarr = moving_average(sortedarr, WINDOW_SIZE)
+ #print vallist
+ # Iterate over dictionary and write (key, value) pairs
+ ind = 0
+ for key in sorted(dictionary):
+ # Space separated
+ f.write(str(key) + " " + str(valarr[ind]) + "\n")
+ ind += 1
+ elif USE_BINNING:
+ sortedlist = []
+ # Iterate over dictionary and write (key, value) pairs
+ ind = 0
+ first = 0
+ last = 0
+ for key in sorted(dictionary):
+ sortedlist.append(key)
+ print "Key: ", key, " - Value: ", dictionary[key], " - Ind: ", ind
+ ind += 1
+ first = hms_to_seconds(sortedlist[0])
+ #print "First: ", key
+ last = hms_to_seconds(sortedlist[ind-1])
+ #print "Last: ", key
+ resultdict = dict()
+ # Put new binning keys
+ time_ind = first
+ ind = 0
+ while time_ind < last:
+ # Initialize with the first key in the list
+ curr_key = sortedlist[ind]
+ curr_key_secs = hms_to_seconds(curr_key)
+ # Initialize with 0 first
+ resultdict[time_ind] = 0
+ # Check if this is still within RANGE - bin the value if it is
+ while time_ind - RANGE <= curr_key_secs and curr_key_secs <= time_ind + RANGE:
+ resultdict[time_ind] += dictionary[curr_key]
+ print "Time index: ", seconds_to_hms(time_ind), " Value: ", resultdict[time_ind]
+ ind += 1
+ if ind > len(dictionary)-1:
+ break
+ # Initialize with the key in the list
+ curr_key = sortedlist[ind]
+ curr_key_secs = hms_to_seconds(curr_key)
+ # Increment time index
+ time_ind += TOTAL_RANGE
+ # Now write to file after binning
+ for key in sorted(resultdict):
+ # Space separated
+ f.write(seconds_to_hms(key) + " " + str(resultdict[key]) + "\n")
+ #print seconds_to_hms(key) + " " + str(resultdict[key])
+
+ else:
+ # Iterate over dictionary and write (key, value) pairs
+ for key in sorted(dictionary):
+ # Space separated
+ f.write(str(key) + " " + str(dictionary[key]) + "\n")
f.close()
- print "Writing output to file: ", filename_out
+ print "Writing output to file: ", filenameout
def main():
print "Usage: python", sys.argv[0], "<input_file> <output_file> <device_name> <mac_address>"
return
# Parse the file for the specified MAC address
- time_freq = parse_json(sys.argv[1], sys.argv[4])
- # Write statistics into file
- save_to_file(sys.argv[3], time_freq, sys.argv[2])
print "====================================================================="
- #for time in time_freq.keys():
- for key in sorted(time_freq):
- print key, " => ", time_freq[key]
+ print "Analyzing file: ", sys.argv[1]
+ timelen_incoming = parse_json(sys.argv[1], sys.argv[4], True)
+ if len(timelen_incoming) > 0:
+ timelen_incoming = include_timestamps_zero_packets(timelen_incoming)
+ print "==> Printing incoming traffic ..."
+ save_to_file(sys.argv[3] + INCOMING_APPENDIX, timelen_incoming, sys.argv[2] + INCOMING_APPENDIX + FILE_APPENDIX)
+ else:
+ print "No incoming traffic to this MAC address!"
print "====================================================================="
+ timelen_outgoing = parse_json(sys.argv[1], sys.argv[4], False)
+ if len(timelen_outgoing) > 0:
+ timelen_outgoing = include_timestamps_zero_packets(timelen_outgoing)
+ print "==> Printing outgoing traffic ..."
+ save_to_file(sys.argv[3] + OUTGOING_APPENDIX, timelen_outgoing, sys.argv[2] + OUTGOING_APPENDIX + FILE_APPENDIX)
+ else:
+ print "No outgoing traffic from this MAC address!"
+ print "====================================================================="
+ #for time in time_freq.keys():
+ #for key in sorted(time_freq):
+ # print key, " => ", time_freq[key]
+ #print "====================================================================="
# Convert JSON file containing DNS traffic to a map in which a hostname points to its set of associated IPs.
-def parse_json(file_path, mac_address):
+def parse_json(filepath, macaddress, incomingoutgoing):
""" Show summary of statistics of PCAP file
Args:
- file_path: path of the read file
- mac_address: MAC address of a device to analyze
+ filepath: path of the read file
+ macaddress: MAC address of a device to analyze
+ incomingoutgoing: boolean to define whether we collect incoming or outgoing traffic
+ True = incoming, False = outgoing
"""
- # Maps timestamps to frequencies of packets
- time_freq = dict()
- with open(file_path) as jf:
+ # Maps timestamps to lengths of packets
+ timelen = dict()
+ with open(filepath) as jf:
# Read JSON.
# data becomes reference to root JSON object (or in our case json array)
data = json.load(jf)
layers = p[JSON_KEY_SOURCE][JSON_KEY_LAYERS]
# Get timestamp
frame = layers.get(JSON_KEY_FRAME, None)
- date_time = frame.get(JSON_KEY_FRAME_TIME, None)
+ datetime = frame.get(JSON_KEY_FRAME_TIME, None)
+ # Get frame length
+ length = frame.get(JSON_KEY_FRAME_LEN, None)
# Get into the Ethernet address part
eth = layers.get(JSON_KEY_ETH, None)
# Skip any non DNS traffic
src = eth.get(JSON_KEY_ETH_SRC, None)
dst = eth.get(JSON_KEY_ETH_DST, None)
# Get just the time part
- date_time_obj = parser.parse(date_time)
+ datetimeobj = parser.parse(datetime)
# Remove the microsecond part
- time_str = str(date_time_obj.time())[:8]
- print str(time_str) + " - src:" + str(src) + " - dest:" + str(dst)
+ timestr = str(datetimeobj.time())[:8]
+ #print str(timestr) + " - src:" + str(src) + " - dest:" + str(dst) + " - length: ", length
# Get and count the traffic for the specified MAC address
- if src == mac_address or dst == mac_address:
- # Check if timestamp already exists in the map
- # If yes, then just increment the frequency value...
- if time_str in time_freq:
- time_freq[time_str] = time_freq[time_str] + 1
- else: # If not, then put the value one there
- time_freq[time_str] = 1
- return time_freq
+ if incomingoutgoing:
+ if dst == macaddress:
+ # Check if timestamp already exists in the map
+ # If yes, then just increment the frequency value...
+ if timestr in timelen:
+ timelen[timestr] = timelen[timestr] + int(length)
+ else: # If not, then put the value one there
+ timelen[timestr] = int(length)
+ else:
+ if src == macaddress:
+ # Check if timestamp already exists in the map
+ # If yes, then just increment the frequency value...
+ if timestr in timelen:
+ timelen[timestr] = timelen[timestr] + int(length)
+ else: # If not, then put the value one there
+ timelen[timestr] = int(length)
+
+ return timelen
+
if __name__ == '__main__':
main()