4 Script that takes a file (output by wireshark/tshark, in JSON format) with DNS traffic
5 and constructs a map (dictionary) in which a hostname points to a set that contains the
6 IP addresses that is associated with that hostname.
11 from collections import defaultdict
13 JSON_KEY_SOURCE = "_source"
14 JSON_KEY_LAYERS = "layers"
16 JSON_KEY_QUERIES = "Queries"
17 JSON_KEY_ANSWERS = "Answers"
18 JSON_KEY_DNS_RESP_TYPE = "dns.resp.type"
19 JSON_KEY_DNS_A = "dns.a" # Key for retrieving IP. 'a' for type A DNS record.
20 JSON_KEY_DNS_RESP_NAME = "dns.resp.name"
21 JSON_KEY_DNS_CNAME = "dns.cname"
25 print "Usage: python", sys.argv[0], "input_file"
27 hn_ip_map = parse_json(sys.argv[1])
28 for hn in hn_ip_map.keys():
29 print "====================================================================="
31 for ip in hn_ip_map[hn]:
33 print "====================================================================="
35 # Convert JSON file containing DNS traffic to a map in which a hostname points to its set of associated IPs.
36 def parse_json(file_path):
37 # Maps hostnames to IPs
38 host_ip_mappings = defaultdict(set)
39 with open(file_path) as jf:
41 # data becomes reference to root JSON object (or in our case json array)
43 # Loop through json objects in data
44 # Each entry is a pcap entry (request/response (packet) and associated metadata)
46 # p is a JSON object, not an index
47 # Drill down to DNS part: _source->layers->dns
48 layers = p[JSON_KEY_SOURCE][JSON_KEY_LAYERS]
49 dns = layers.get(JSON_KEY_DNS, None)
50 # Skip any non DNS traffic
52 print "[ WARNING: Non DNS traffic ]"
54 # We only care about DNS responses as these also contain a copy of the query that they answer
55 answers = dns.get(JSON_KEY_ANSWERS, None)
58 ## Now that we know that it is an answer, the queries should also be available.
59 queries = dns.get(JSON_KEY_QUERIES)
60 if len(queries.keys()) > 1:
61 # Unclear if script will behave correctly for DNS lookups with multiple queries
62 print "[ WARNING: Multi query DNS lookup ]"
63 for ak in answers.keys():
65 # We are looking for type A records as these are the ones that contain the IP.
67 if a[JSON_KEY_DNS_RESP_TYPE] == "1":
69 ip = a[JSON_KEY_DNS_A]
70 # The answer may be the canonical name.
71 # Now trace back the answer stack, looking for any higher level aliases.
72 hostname = find_alias_hostname(answers, a[JSON_KEY_DNS_RESP_NAME])
73 # Add mapping of hostname to ip to our data structure
74 host_ip_mappings[hostname].add(ip)
75 return host_ip_mappings
77 # Recursively traverse set of answers trying to find the top most alias for a canonical name
78 def find_alias_hostname(answers, hostname):
79 for ak in answers.keys():
81 cname = a.get(JSON_KEY_DNS_CNAME, None)
82 # We only care about type=CNAME records
86 # Located the right answer, perform recursive search for higher level aliases.
87 return find_alias_hostname(answers, a[JSON_KEY_DNS_RESP_NAME])
90 if __name__ == '__main__':