From: rtrimana Date: Mon, 6 Nov 2017 18:24:59 +0000 (-0800) Subject: Restructuring files and folders X-Git-Url: http://plrg.eecs.uci.edu/git/?p=pingpong.git;a=commitdiff_plain;h=4973373183489ff35d28453bcc231dfc90a6af70 Restructuring files and folders --- diff --git a/parse_dns.py b/parse_dns.py deleted file mode 100644 index 831b075..0000000 --- a/parse_dns.py +++ /dev/null @@ -1,206 +0,0 @@ -#!/usr/bin/python - -""" -Script that takes a file (output by wireshark/tshark, in JSON format) with DNS traffic -and constructs a map (dictionary) in which a hostname points to a set that contains the -IP addresses that is associated with that hostname. -""" - -import sys -import json -from collections import defaultdict -from decimal import * - -ROUTER_MAC = "b0:b9:8a:73:69:8e" - -JSON_KEY_SOURCE = "_source" -JSON_KEY_LAYERS = "layers" -JSON_KEY_DNS = "dns" -JSON_KEY_QUERIES = "Queries" -JSON_KEY_ANSWERS = "Answers" -JSON_KEY_DNS_RESP_TYPE = "dns.resp.type" -JSON_KEY_DNS_A = "dns.a" # Key for retrieving IP. 'a' for type A DNS record. -JSON_KEY_DNS_RESP_NAME = "dns.resp.name" -JSON_KEY_DNS_CNAME = "dns.cname" -JSON_KEY_ETH = "eth" -JSON_KEY_ETH_DST = "eth.dst" -JSON_KEY_FRAME = "frame" -JSON_KEY_FRAME_TIME_EPOCH = "frame.time_epoch" - -def main(): - if len(sys.argv) < 2: - print "Usage: python", sys.argv[0], "input_file" - return - mac_to_ddm = parse_json_dns(sys.argv[1]) - for mac in mac_to_ddm: - ddm = mac_to_ddm[mac] - ddm.print_mappings() - # maps_tuple = parse_json_dns(sys.argv[1]) - - # # print hostname to ip map - # hn_ip_map = maps_tuple[0] - # for hn in hn_ip_map.keys(): - # print "=====================================================================" - # print hn, "maps to:" - # for ip in hn_ip_map[hn]: - # print " -", ip - # print "=====================================================================" - - # print " " - - # # print ip to hostname map - # ip_hn_map = maps_tuple[1] - # for ip in ip_hn_map.keys(): - # print "=====================================================================" - # print ip, "maps to:" - # for hn in ip_hn_map[ip]: - # print " -", hn - # print "=====================================================================" - -class DeviceDNSMap: - def __init__(self, mac_address): - # MAC address of device - self.mac = mac_address - # Maps an external IP to a list of (timestamp,hostname) tuples. - # Entries in the list should be interpreted as follows: - # the timestamp indicates WHEN this device mapped the given ip (key in dict) to the hostname. - self.ip_mappings = defaultdict(list) - - def hostname_for_ip_at_time(self, ip, timestamp): - # Does device have a mapping for the given IP? - if not ip in self.ip_mappings: - return None - if not self.ip_mappings[ip]: - # If list of (timestamp,hostname) tuples is empty, there is no mapping to report. - return None - # Best fit mapping: the mapping immediately BEFORE timestamp parameter. - # Start with random pick (element 0). - best_fit = self.ip_mappings[ip][0] - for t in self.ip_mappings[ip]: - # t is a (timestamp,hostname) tuple - if t[0] < timestamp and t[0] > best_fit[0]: - # t is a better fit if it happened BEFORE the input timestamp - # and is LATER than the current best_fit - best_fit = t - # return the matching hostname - return best_fit[1] - - def add_mapping(self, ip, timestamp_hostname_tuple): - self.ip_mappings[ip].append(timestamp_hostname_tuple) - - def print_mappings(self): - count = 0 - print "### Mappings for MAC = ", self.mac, "###" - for ip in self.ip_mappings: - print "--- IP ", ip, " maps to: ---" - for t in self.ip_mappings[ip]: - print t[1], "at epoch time =", t[0] - count += 1 - print "### Total of", count, "mappings for", self.mac, "###" - - # -------------------------------------------------------------------------- - # Define eq and hash such that instances of the class can be used as keys in dictionaries. - # Equality is based on MAC as a MAC uniquely identifies the device. - def __eq__(self, another): - return hasattr(another, 'mac') and self.mac == another.mac - def __hash__(self): - return hash(self.data) - # -------------------------------------------------------------------------- - - -def parse_json_dns(file_path): - # Our end output: dictionary of MAC addresses with DeviceDNSMaps as values. - # Each DeviceDNSMap contains DNS lookups performed by the device with the corresponding MAC. - result = defaultdict() - with open(file_path) as jf: - # Read JSON. - # data becomes reference to root JSON object (or in our case json array) - data = json.load(jf) - # Loop through json objects in data - # Each entry is a pcap entry (request/response (packet) and associated metadata) - for p in data: - # p is a JSON object, not an index - # Drill down to DNS part: _source->layers->dns - layers = p[JSON_KEY_SOURCE][JSON_KEY_LAYERS] - dns = layers.get(JSON_KEY_DNS, None) - # Skip any non DNS traffic - if dns is None: - print "[ WARNING: Non DNS traffic ]" - continue - # We only care about DNS responses as these also contain a copy of the query that they answer - answers = dns.get(JSON_KEY_ANSWERS, None) - if answers is None: - continue - ## Now that we know that it is an answer, the queries should also be available. - queries = dns.get(JSON_KEY_QUERIES) - if len(queries.keys()) > 1: - # Unclear if script will behave correctly for DNS lookups with multiple queries - print "[ WARNING: Multi query DNS lookup ]" - # Get ethernet information for identifying the device performing the DNS lookup. - eth = layers.get(JSON_KEY_ETH, None) - if eth is None: - print "[ WARNING: eth data not found ]" - continue - # As this is a response to a DNS query, the IoT device is the destination. - # Get the device MAC of that device. - device_mac = eth.get(JSON_KEY_ETH_DST, None) - if device_mac is None: - print "[ WARNING: eth.dst data not found ]" - continue - # Get the router's timestamp for this packet - # so that we can mark when the DNS mapping occurred - timestamp = Decimal(layers[JSON_KEY_FRAME][JSON_KEY_FRAME_TIME_EPOCH]) - for ak in answers.keys(): - a = answers[ak] - # We are looking for type A records as these are the ones that contain the IP. - # Type A == type 1 - if a[JSON_KEY_DNS_RESP_TYPE] == "1": - # get the IP - ip = a[JSON_KEY_DNS_A] - # The answer may be the canonical name. - # Now trace back the answer stack, looking for any higher level aliases. - hostname = find_alias_hostname(answers, a[JSON_KEY_DNS_RESP_NAME]) - # Create the tuple that indicates WHEN the ip to hostname mapping occurred - timestamp_hostname_tuple = (timestamp,hostname) - if device_mac in result: - # If we already have DNS data for the device with this MAC: - # Add the mapping to the DeviceDNSMap that is already present in the dict. - result[device_mac].add_mapping(ip, timestamp_hostname_tuple) - else: - # No DNS data for this device yet: - # Create a new DeviceDNSMap, add the mapping, and at it to the dict. - ddm = DeviceDNSMap(device_mac) - ddm.add_mapping(ip, timestamp_hostname_tuple) - result[device_mac] = ddm - return result - -# Recursively traverse set of answers trying to find the top most alias for a canonical name -def find_alias_hostname(answers, hostname): - for ak in answers.keys(): - a = answers[ak] - cname = a.get(JSON_KEY_DNS_CNAME, None) - # We only care about type=CNAME records - if cname is None: - continue - if cname == hostname: - # Located the right answer, perform recursive search for higher level aliases. - return find_alias_hostname(answers, a[JSON_KEY_DNS_RESP_NAME]) - return hostname - -if __name__ == '__main__': - main() - -# ================================================================================================ -# Notes/brainstorming how to do ip to host mappings. - -# Maps IPs to hostnames. Uses a dictionary of dictionaries. -# IP lookup in the outer dictionary returns a dictionary that has hostnames as keys. -# Looking up a hostname in the inner dictionary returns a set of timestamps. -# Each timestamp indicate the time at which the IP<->hostname mapping was determined by a DNS query. -# Note that the keyset of the inner dictionary will be of size 1 in most cases. -# When this is the case, the value (the set of timestamps) can be ignored. -# The values are only relevant when one IP maps to more than 1 hostname. -# When this the case, the timestamps must be considered to find the most recent mapping. -# ip_host_mappings = defaultdict(defaultdict(set)) - -# ================================================================================================ \ No newline at end of file diff --git a/parse_packet_frequency.py b/parse_packet_frequency.py deleted file mode 100644 index 5c26679..0000000 --- a/parse_packet_frequency.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/python - -""" -Script that takes a file (output by wireshark/tshark, in JSON format) and analyze -the traffic frequency of a certain device at a certain time. -""" - -import sys -import json -from collections import defaultdict -from dateutil import parser - -JSON_KEY_SOURCE = "_source" -JSON_KEY_LAYERS = "layers" - -JSON_KEY_ETH = "eth" -JSON_KEY_ETH_DST = "eth.dst" -JSON_KEY_ETH_SRC = "eth.src" -JSON_KEY_FRAME = "frame" -JSON_KEY_FRAME_TIME = "frame.time" - - -def save_to_file(tbl_header, dictionary, filename_out): - """ Show summary of statistics of PCAP file - Args: - tbl_header: header for the saved table - dictionary: dictionary to be saved - filename_out: file name to save - """ - # Appending, not overwriting! - f = open(filename_out, 'a') - # Write the table header - f.write("\n\n" + str(tbl_header) + "\n"); - # Iterate over dictionary and write (key, value) pairs - #for key, value in dictionary.iteritems(): - for key in sorted(dictionary): - f.write(str(key) + ", " + str(dictionary[key]) + "\n") - - f.close() - print "Writing output to file: ", filename_out - - -def main(): - """ Main function - """ - if len(sys.argv) < 5: - print "Usage: python", sys.argv[0], " " - return - # Parse the file for the specified MAC address - time_freq = parse_json(sys.argv[1], sys.argv[4]) - # Write statistics into file - save_to_file(sys.argv[3], time_freq, sys.argv[2]) - print "=====================================================================" - #for time in time_freq.keys(): - for key in sorted(time_freq): - print key, " => ", time_freq[key] - print "=====================================================================" - - -# Convert JSON file containing DNS traffic to a map in which a hostname points to its set of associated IPs. -def parse_json(file_path, mac_address): - """ Show summary of statistics of PCAP file - Args: - file_path: path of the read file - mac_address: MAC address of a device to analyze - """ - # Maps timestamps to frequencies of packets - time_freq = dict() - with open(file_path) as jf: - # Read JSON. - # data becomes reference to root JSON object (or in our case json array) - data = json.load(jf) - # Loop through json objects in data - # Each entry is a pcap entry (request/response (packet) and associated metadata) - for p in data: - # p is a JSON object, not an index - layers = p[JSON_KEY_SOURCE][JSON_KEY_LAYERS] - # Get timestamp - frame = layers.get(JSON_KEY_FRAME, None) - date_time = frame.get(JSON_KEY_FRAME_TIME, None) - # Get into the Ethernet address part - eth = layers.get(JSON_KEY_ETH, None) - # Skip any non DNS traffic - if eth is None: - print "[ WARNING: Packet has no ethernet address! ]" - continue - # Get source and destination MAC addresses - src = eth.get(JSON_KEY_ETH_SRC, None) - dst = eth.get(JSON_KEY_ETH_DST, None) - # Get just the time part - date_time_obj = parser.parse(date_time) - # Remove the microsecond part - time_str = str(date_time_obj.time())[:8] - print str(time_str) + " - src:" + str(src) + " - dest:" + str(dst) - # Get and count the traffic for the specified MAC address - if src == mac_address or dst == mac_address: - # Check if timestamp already exists in the map - # If yes, then just increment the frequency value... - if time_str in time_freq: - time_freq[time_str] = time_freq[time_str] + 1 - else: # If not, then put the value one there - time_freq[time_str] = 1 - return time_freq - -if __name__ == '__main__': - main() - diff --git a/parser/parse_dns.py b/parser/parse_dns.py new file mode 100644 index 0000000..831b075 --- /dev/null +++ b/parser/parse_dns.py @@ -0,0 +1,206 @@ +#!/usr/bin/python + +""" +Script that takes a file (output by wireshark/tshark, in JSON format) with DNS traffic +and constructs a map (dictionary) in which a hostname points to a set that contains the +IP addresses that is associated with that hostname. +""" + +import sys +import json +from collections import defaultdict +from decimal import * + +ROUTER_MAC = "b0:b9:8a:73:69:8e" + +JSON_KEY_SOURCE = "_source" +JSON_KEY_LAYERS = "layers" +JSON_KEY_DNS = "dns" +JSON_KEY_QUERIES = "Queries" +JSON_KEY_ANSWERS = "Answers" +JSON_KEY_DNS_RESP_TYPE = "dns.resp.type" +JSON_KEY_DNS_A = "dns.a" # Key for retrieving IP. 'a' for type A DNS record. +JSON_KEY_DNS_RESP_NAME = "dns.resp.name" +JSON_KEY_DNS_CNAME = "dns.cname" +JSON_KEY_ETH = "eth" +JSON_KEY_ETH_DST = "eth.dst" +JSON_KEY_FRAME = "frame" +JSON_KEY_FRAME_TIME_EPOCH = "frame.time_epoch" + +def main(): + if len(sys.argv) < 2: + print "Usage: python", sys.argv[0], "input_file" + return + mac_to_ddm = parse_json_dns(sys.argv[1]) + for mac in mac_to_ddm: + ddm = mac_to_ddm[mac] + ddm.print_mappings() + # maps_tuple = parse_json_dns(sys.argv[1]) + + # # print hostname to ip map + # hn_ip_map = maps_tuple[0] + # for hn in hn_ip_map.keys(): + # print "=====================================================================" + # print hn, "maps to:" + # for ip in hn_ip_map[hn]: + # print " -", ip + # print "=====================================================================" + + # print " " + + # # print ip to hostname map + # ip_hn_map = maps_tuple[1] + # for ip in ip_hn_map.keys(): + # print "=====================================================================" + # print ip, "maps to:" + # for hn in ip_hn_map[ip]: + # print " -", hn + # print "=====================================================================" + +class DeviceDNSMap: + def __init__(self, mac_address): + # MAC address of device + self.mac = mac_address + # Maps an external IP to a list of (timestamp,hostname) tuples. + # Entries in the list should be interpreted as follows: + # the timestamp indicates WHEN this device mapped the given ip (key in dict) to the hostname. + self.ip_mappings = defaultdict(list) + + def hostname_for_ip_at_time(self, ip, timestamp): + # Does device have a mapping for the given IP? + if not ip in self.ip_mappings: + return None + if not self.ip_mappings[ip]: + # If list of (timestamp,hostname) tuples is empty, there is no mapping to report. + return None + # Best fit mapping: the mapping immediately BEFORE timestamp parameter. + # Start with random pick (element 0). + best_fit = self.ip_mappings[ip][0] + for t in self.ip_mappings[ip]: + # t is a (timestamp,hostname) tuple + if t[0] < timestamp and t[0] > best_fit[0]: + # t is a better fit if it happened BEFORE the input timestamp + # and is LATER than the current best_fit + best_fit = t + # return the matching hostname + return best_fit[1] + + def add_mapping(self, ip, timestamp_hostname_tuple): + self.ip_mappings[ip].append(timestamp_hostname_tuple) + + def print_mappings(self): + count = 0 + print "### Mappings for MAC = ", self.mac, "###" + for ip in self.ip_mappings: + print "--- IP ", ip, " maps to: ---" + for t in self.ip_mappings[ip]: + print t[1], "at epoch time =", t[0] + count += 1 + print "### Total of", count, "mappings for", self.mac, "###" + + # -------------------------------------------------------------------------- + # Define eq and hash such that instances of the class can be used as keys in dictionaries. + # Equality is based on MAC as a MAC uniquely identifies the device. + def __eq__(self, another): + return hasattr(another, 'mac') and self.mac == another.mac + def __hash__(self): + return hash(self.data) + # -------------------------------------------------------------------------- + + +def parse_json_dns(file_path): + # Our end output: dictionary of MAC addresses with DeviceDNSMaps as values. + # Each DeviceDNSMap contains DNS lookups performed by the device with the corresponding MAC. + result = defaultdict() + with open(file_path) as jf: + # Read JSON. + # data becomes reference to root JSON object (or in our case json array) + data = json.load(jf) + # Loop through json objects in data + # Each entry is a pcap entry (request/response (packet) and associated metadata) + for p in data: + # p is a JSON object, not an index + # Drill down to DNS part: _source->layers->dns + layers = p[JSON_KEY_SOURCE][JSON_KEY_LAYERS] + dns = layers.get(JSON_KEY_DNS, None) + # Skip any non DNS traffic + if dns is None: + print "[ WARNING: Non DNS traffic ]" + continue + # We only care about DNS responses as these also contain a copy of the query that they answer + answers = dns.get(JSON_KEY_ANSWERS, None) + if answers is None: + continue + ## Now that we know that it is an answer, the queries should also be available. + queries = dns.get(JSON_KEY_QUERIES) + if len(queries.keys()) > 1: + # Unclear if script will behave correctly for DNS lookups with multiple queries + print "[ WARNING: Multi query DNS lookup ]" + # Get ethernet information for identifying the device performing the DNS lookup. + eth = layers.get(JSON_KEY_ETH, None) + if eth is None: + print "[ WARNING: eth data not found ]" + continue + # As this is a response to a DNS query, the IoT device is the destination. + # Get the device MAC of that device. + device_mac = eth.get(JSON_KEY_ETH_DST, None) + if device_mac is None: + print "[ WARNING: eth.dst data not found ]" + continue + # Get the router's timestamp for this packet + # so that we can mark when the DNS mapping occurred + timestamp = Decimal(layers[JSON_KEY_FRAME][JSON_KEY_FRAME_TIME_EPOCH]) + for ak in answers.keys(): + a = answers[ak] + # We are looking for type A records as these are the ones that contain the IP. + # Type A == type 1 + if a[JSON_KEY_DNS_RESP_TYPE] == "1": + # get the IP + ip = a[JSON_KEY_DNS_A] + # The answer may be the canonical name. + # Now trace back the answer stack, looking for any higher level aliases. + hostname = find_alias_hostname(answers, a[JSON_KEY_DNS_RESP_NAME]) + # Create the tuple that indicates WHEN the ip to hostname mapping occurred + timestamp_hostname_tuple = (timestamp,hostname) + if device_mac in result: + # If we already have DNS data for the device with this MAC: + # Add the mapping to the DeviceDNSMap that is already present in the dict. + result[device_mac].add_mapping(ip, timestamp_hostname_tuple) + else: + # No DNS data for this device yet: + # Create a new DeviceDNSMap, add the mapping, and at it to the dict. + ddm = DeviceDNSMap(device_mac) + ddm.add_mapping(ip, timestamp_hostname_tuple) + result[device_mac] = ddm + return result + +# Recursively traverse set of answers trying to find the top most alias for a canonical name +def find_alias_hostname(answers, hostname): + for ak in answers.keys(): + a = answers[ak] + cname = a.get(JSON_KEY_DNS_CNAME, None) + # We only care about type=CNAME records + if cname is None: + continue + if cname == hostname: + # Located the right answer, perform recursive search for higher level aliases. + return find_alias_hostname(answers, a[JSON_KEY_DNS_RESP_NAME]) + return hostname + +if __name__ == '__main__': + main() + +# ================================================================================================ +# Notes/brainstorming how to do ip to host mappings. + +# Maps IPs to hostnames. Uses a dictionary of dictionaries. +# IP lookup in the outer dictionary returns a dictionary that has hostnames as keys. +# Looking up a hostname in the inner dictionary returns a set of timestamps. +# Each timestamp indicate the time at which the IP<->hostname mapping was determined by a DNS query. +# Note that the keyset of the inner dictionary will be of size 1 in most cases. +# When this is the case, the value (the set of timestamps) can be ignored. +# The values are only relevant when one IP maps to more than 1 hostname. +# When this the case, the timestamps must be considered to find the most recent mapping. +# ip_host_mappings = defaultdict(defaultdict(set)) + +# ================================================================================================ \ No newline at end of file diff --git a/parser/parse_packet_frequency.py b/parser/parse_packet_frequency.py new file mode 100644 index 0000000..5c26679 --- /dev/null +++ b/parser/parse_packet_frequency.py @@ -0,0 +1,107 @@ +#!/usr/bin/python + +""" +Script that takes a file (output by wireshark/tshark, in JSON format) and analyze +the traffic frequency of a certain device at a certain time. +""" + +import sys +import json +from collections import defaultdict +from dateutil import parser + +JSON_KEY_SOURCE = "_source" +JSON_KEY_LAYERS = "layers" + +JSON_KEY_ETH = "eth" +JSON_KEY_ETH_DST = "eth.dst" +JSON_KEY_ETH_SRC = "eth.src" +JSON_KEY_FRAME = "frame" +JSON_KEY_FRAME_TIME = "frame.time" + + +def save_to_file(tbl_header, dictionary, filename_out): + """ Show summary of statistics of PCAP file + Args: + tbl_header: header for the saved table + dictionary: dictionary to be saved + filename_out: file name to save + """ + # Appending, not overwriting! + f = open(filename_out, 'a') + # Write the table header + f.write("\n\n" + str(tbl_header) + "\n"); + # Iterate over dictionary and write (key, value) pairs + #for key, value in dictionary.iteritems(): + for key in sorted(dictionary): + f.write(str(key) + ", " + str(dictionary[key]) + "\n") + + f.close() + print "Writing output to file: ", filename_out + + +def main(): + """ Main function + """ + if len(sys.argv) < 5: + print "Usage: python", sys.argv[0], " " + return + # Parse the file for the specified MAC address + time_freq = parse_json(sys.argv[1], sys.argv[4]) + # Write statistics into file + save_to_file(sys.argv[3], time_freq, sys.argv[2]) + print "=====================================================================" + #for time in time_freq.keys(): + for key in sorted(time_freq): + print key, " => ", time_freq[key] + print "=====================================================================" + + +# Convert JSON file containing DNS traffic to a map in which a hostname points to its set of associated IPs. +def parse_json(file_path, mac_address): + """ Show summary of statistics of PCAP file + Args: + file_path: path of the read file + mac_address: MAC address of a device to analyze + """ + # Maps timestamps to frequencies of packets + time_freq = dict() + with open(file_path) as jf: + # Read JSON. + # data becomes reference to root JSON object (or in our case json array) + data = json.load(jf) + # Loop through json objects in data + # Each entry is a pcap entry (request/response (packet) and associated metadata) + for p in data: + # p is a JSON object, not an index + layers = p[JSON_KEY_SOURCE][JSON_KEY_LAYERS] + # Get timestamp + frame = layers.get(JSON_KEY_FRAME, None) + date_time = frame.get(JSON_KEY_FRAME_TIME, None) + # Get into the Ethernet address part + eth = layers.get(JSON_KEY_ETH, None) + # Skip any non DNS traffic + if eth is None: + print "[ WARNING: Packet has no ethernet address! ]" + continue + # Get source and destination MAC addresses + src = eth.get(JSON_KEY_ETH_SRC, None) + dst = eth.get(JSON_KEY_ETH_DST, None) + # Get just the time part + date_time_obj = parser.parse(date_time) + # Remove the microsecond part + time_str = str(date_time_obj.time())[:8] + print str(time_str) + " - src:" + str(src) + " - dest:" + str(dst) + # Get and count the traffic for the specified MAC address + if src == mac_address or dst == mac_address: + # Check if timestamp already exists in the map + # If yes, then just increment the frequency value... + if time_str in time_freq: + time_freq[time_str] = time_freq[time_str] + 1 + else: # If not, then put the value one there + time_freq[time_str] = 1 + return time_freq + +if __name__ == '__main__': + main() +