From: rtrimana Date: Fri, 3 Nov 2017 15:33:55 +0000 (-0700) Subject: A simple script to analyze the distribution of packet frequencies against time X-Git-Url: http://plrg.eecs.uci.edu/git/?p=pingpong.git;a=commitdiff_plain;h=bceb30b9aba7fa2d4df53e8196ee981f27d3cba4;hp=82489cd1792874402858ded7dc0054032b0ca7a7 A simple script to analyze the distribution of packet frequencies against time --- diff --git a/parse_packet_frequency.py b/parse_packet_frequency.py new file mode 100644 index 0000000..813471a --- /dev/null +++ b/parse_packet_frequency.py @@ -0,0 +1,106 @@ +#!/usr/bin/python + +""" +Script that takes a file (output by wireshark/tshark, in JSON format) and analyze +the traffic frequency of a certain device at a certain time. +""" + +import sys +import json +from collections import defaultdict +from dateutil import parser + +JSON_KEY_SOURCE = "_source" +JSON_KEY_LAYERS = "layers" + +JSON_KEY_ETH = "eth" +JSON_KEY_ETH_DST = "eth.dst" +JSON_KEY_ETH_SRC = "eth.src" +JSON_KEY_FRAME = "frame" +JSON_KEY_FRAME_TIME = "frame.time" + + +def save_to_file(tbl_header, dictionary, filename_out): + """ Show summary of statistics of PCAP file + Args: + tbl_header: header for the saved table + dictionary: dictionary to be saved + filename_out: file name to save + """ + # Appending, not overwriting! + f = open(filename_out, 'a') + # Write the table header + f.write("\n\n" + str(tbl_header) + "\n"); + # Iterate over dictionary and write (key, value) pairs + #for key, value in dictionary.iteritems(): + for key in sorted(dictionary): + f.write(str(key) + ", " + str(dictionary[key]) + "\n") + + f.close() + print "Writing output to file: ", filename_out + + +def main(): + """ Main function + """ + if len(sys.argv) < 5: + print "Usage: python", sys.argv[0], " " + return + # Parse the file for the specified MAC address + time_freq = parse_json(sys.argv[1], sys.argv[4]) + # Write statistics into file + save_to_file(sys.argv[3], time_freq, sys.argv[2]) + print "=====================================================================" + for time in time_freq.keys(): + print time, " => ", time_freq[time] + print "=====================================================================" + + +# Convert JSON file containing DNS traffic to a map in which a hostname points to its set of associated IPs. +def parse_json(file_path, mac_address): + """ Show summary of statistics of PCAP file + Args: + file_path: path of the read file + mac_address: MAC address of a device to analyze + """ + # Maps timestamps to frequencies of packets + time_freq = dict() + with open(file_path) as jf: + # Read JSON. + # data becomes reference to root JSON object (or in our case json array) + data = json.load(jf) + # Loop through json objects in data + # Each entry is a pcap entry (request/response (packet) and associated metadata) + for p in data: + # p is a JSON object, not an index + layers = p[JSON_KEY_SOURCE][JSON_KEY_LAYERS] + # Get timestamp + frame = layers.get(JSON_KEY_FRAME, None) + date_time = frame.get(JSON_KEY_FRAME_TIME, None) + # Get into the Ethernet address part + eth = layers.get(JSON_KEY_ETH, None) + # Skip any non DNS traffic + if eth is None: + print "[ WARNING: Packet has no ethernet address! ]" + continue + # Get source and destination MAC addresses + src = eth.get(JSON_KEY_ETH_SRC, None) + dst = eth.get(JSON_KEY_ETH_DST, None) + # Get just the time part + date_time_obj = parser.parse(date_time) + # Remove the microsecond part + time_str = str(date_time_obj.time())[:8] + print str(time_str) + " - src:" + str(src) + " - dest:" + str(dst) + # Get and count the traffic for the specified MAC address + if src == mac_address or dst == mac_address: + # Check if timestamp already exists in the map + # If yes, then just increment the frequency value... + if time_str in time_freq: + time_freq[time_str] = time_freq[time_str] + 1 + else: # If not, then put the value one there + time_freq[time_str] = 1 + return time_freq + +if __name__ == '__main__': + main() +