4 Script that takes a file (output by wireshark/tshark, in JSON format) with DNS traffic
5 and constructs a map (dictionary) in which a hostname points to a set that contains the
6 IP addresses that is associated with that hostname.
11 from collections import defaultdict
13 JSON_KEY_SOURCE = "_source"
14 JSON_KEY_LAYERS = "layers"
16 JSON_KEY_QUERIES = "Queries"
17 JSON_KEY_ANSWERS = "Answers"
18 JSON_KEY_DNS_RESP_TYPE = "dns.resp.type"
19 JSON_KEY_DNS_A = "dns.a" # Key for retrieving IP. 'a' for type A DNS record.
20 JSON_KEY_DNS_RESP_NAME = "dns.resp.name"
21 JSON_KEY_DNS_CNAME = "dns.cname"
25 print "Usage: python", sys.argv[0], "input_file"
27 maps_tuple = parse_json_dns(sys.argv[1])
29 # print hostname to ip map
30 hn_ip_map = maps_tuple[0]
31 for hn in hn_ip_map.keys():
32 print "====================================================================="
34 for ip in hn_ip_map[hn]:
36 print "====================================================================="
40 # print ip to hostname map
41 ip_hn_map = maps_tuple[1]
42 for ip in ip_hn_map.keys():
43 print "====================================================================="
45 for hn in ip_hn_map[ip]:
47 print "====================================================================="
49 # Convert JSON file containing DNS traffic to a tuple with two maps.
50 # Index 0 of the tuple is a map in which a hostname points to its set of associated IPs.
51 # Index 1 of the tuple is a map in which an ip points to its set of associated hostnames.
52 def parse_json_dns(file_path):
53 # Maps hostnames to IPs
54 host_ip_mappings = defaultdict(set)
55 # Maps ips to hostnames
56 ip_host_mappings = defaultdict(set)
57 with open(file_path) as jf:
59 # data becomes reference to root JSON object (or in our case json array)
61 # Loop through json objects in data
62 # Each entry is a pcap entry (request/response (packet) and associated metadata)
64 # p is a JSON object, not an index
65 # Drill down to DNS part: _source->layers->dns
66 layers = p[JSON_KEY_SOURCE][JSON_KEY_LAYERS]
67 dns = layers.get(JSON_KEY_DNS, None)
68 # Skip any non DNS traffic
70 print "[ WARNING: Non DNS traffic ]"
72 # We only care about DNS responses as these also contain a copy of the query that they answer
73 answers = dns.get(JSON_KEY_ANSWERS, None)
76 ## Now that we know that it is an answer, the queries should also be available.
77 queries = dns.get(JSON_KEY_QUERIES)
78 if len(queries.keys()) > 1:
79 # Unclear if script will behave correctly for DNS lookups with multiple queries
80 print "[ WARNING: Multi query DNS lookup ]"
81 for ak in answers.keys():
83 # We are looking for type A records as these are the ones that contain the IP.
85 if a[JSON_KEY_DNS_RESP_TYPE] == "1":
87 ip = a[JSON_KEY_DNS_A]
88 # The answer may be the canonical name.
89 # Now trace back the answer stack, looking for any higher level aliases.
90 hostname = find_alias_hostname(answers, a[JSON_KEY_DNS_RESP_NAME])
91 # Add mapping of hostname to ip to our data structure
92 host_ip_mappings[hostname].add(ip)
93 # Add mapping of ip to hostname to our data structure
94 ip_host_mappings[ip].add(hostname)
95 return (host_ip_mappings, ip_host_mappings)
97 # Recursively traverse set of answers trying to find the top most alias for a canonical name
98 def find_alias_hostname(answers, hostname):
99 for ak in answers.keys():
101 cname = a.get(JSON_KEY_DNS_CNAME, None)
102 # We only care about type=CNAME records
105 if cname == hostname:
106 # Located the right answer, perform recursive search for higher level aliases.
107 return find_alias_hostname(answers, a[JSON_KEY_DNS_RESP_NAME])
110 if __name__ == '__main__':
113 # ================================================================================================
114 # Notes/brainstorming how to do ip to host mappings.
116 # Maps IPs to hostnames. Uses a dictionary of dictionaries.
117 # IP lookup in the outer dictionary returns a dictionary that has hostnames as keys.
118 # Looking up a hostname in the inner dictionary returns a set of timestamps.
119 # Each timestamp indicate the time at which the IP<->hostname mapping was determined by a DNS query.
120 # Note that the keyset of the inner dictionary will be of size 1 in most cases.
121 # When this is the case, the value (the set of timestamps) can be ignored.
122 # The values are only relevant when one IP maps to more than 1 hostname.
123 # When this the case, the timestamps must be considered to find the most recent mapping.
124 # ip_host_mappings = defaultdict(defaultdict(set))
126 # ================================================================================================