4 Script that constructs a graph in which hosts are nodes.
5 An edge between two hosts indicate that the hosts communicate.
6 Hosts are labeled and identified by their IPs.
7 The graph is written to a file in Graph Exchange XML format for later import and visual inspection in Gephi.
9 The input to this script is the JSON output by extract_from_tshark.py by Anastasia Shuba.
11 This script is a simplification of Milad Asgari's parser_data_to_gephi.py script.
12 It serves as a baseline for future scripts that want to include more information in the graph.
22 import parser.parse_dns
24 JSON_KEY_ETH_SRC = "eth.src"
25 JSON_KEY_ETH_DST = "eth.dst"
27 def parse_json(file_path):
29 device_dns_mappings = parser.parse_dns.parse_json_dns("./json/dns.json")
33 with open(file_path) as jf:
35 # data becomes reference to root JSON object (or in our case json array)
37 # Loop through json objects in data
39 # Fetch timestamp of packet
40 packet_timestamp = Decimal(data[k]["ts"])
41 # Fetch eth source and destination info
42 eth_src = data[k][JSON_KEY_ETH_SRC]
43 eth_dst = data[k][JSON_KEY_ETH_DST]
44 # Traffic can be both outbound and inbound.
45 # Determine which one of the two by looking up device MAC in DNS map.
47 if eth_src in device_dns_mappings:
49 elif eth_dst in device_dns_mappings:
52 print "[ WARNING: DNS mapping not found for device with MAC", eth_src, "OR", eth_dst, "]"
53 # This must be local communication between two IoT devices OR an IoT device talking to a hardcoded IP.
54 # For now let's assume local communication.
55 # Add a node for each device and an edge between them.
58 G.add_edge(eth_src, eth_dst)
59 # TODO add regex check on src+dst IP to figure out if hardcoded server IP (e.g. check if one of the two are NOT a 192.168.x.y IP)
61 # It is outbound traffic if iot_device matches src, otherwise it must be inbound traffic.
62 outbound_traffic = iot_device == eth_src
64 ''' Graph construction '''
65 # No need to check if the Nodes and/or Edges we add already exist:
66 # NetworkX won't add already existing nodes/edges (except in the case of a MultiGraph or MultiDiGraph (see NetworkX doc)).
68 # Add a node for each host.
69 # First add node for IoT device.
70 G.add_node(iot_device)
71 # Then add node for the server.
72 # For this we need to distinguish between outbound and inbound traffic so that we look up the proper IP in our DNS map.
73 # For outbound traffic, the server's IP is the destination IP.
74 # For inbound traffic, the server's IP is the source IP.
75 server_ip = data[k]["dst_ip"] if outbound_traffic else data[k]["src_ip"]
76 hostname = device_dns_mappings[iot_device].hostname_for_ip_at_time(server_ip, packet_timestamp)
78 # TODO this can occur when two local devices communicate OR if IoT device has hardcoded server IP.
79 # However, we only get here for the DNS that have not performed any DNS lookups
80 # We should use a regex check early in the loop to see if it is two local devices communicating.
81 # This way we would not have to consider these corner cases later on.
82 print "[ WARNING: no ip-hostname mapping found for ip", server_ip, " -- adding eth.src->eth.dst edge, but note that this may be incorrect if IoT device has hardcoded server IP ]"
85 G.add_edge(eth_src, eth_dst)
88 # Connect the two nodes we just added.
90 G.add_edge(iot_device, hostname)
92 G.add_edge(hostname, iot_device)
95 # ------------------------------------------------------
97 # Might be useful later on if we wish to resolve IPs.
99 ext_result = tldextract.extract(str(host))
100 # Be consistent with ReCon and keep suffix
101 domain = ext_result.domain + "." + ext_result.suffix
106 socket.inet_aton(addr)
110 # ------------------------------------------------------
112 if __name__ == '__main__':
113 if len(sys.argv) < 3:
114 print "Usage:", sys.argv[0], "input_file output_file"
115 print "outfile_file should end in .gexf"
117 # Input file: Path to JSON file generated from tshark JSON output using Anastasia's script (extract_from_tshark.py).
118 input_file = sys.argv[1]
119 print "[ input_file =", input_file, "]"
120 # Output file: Path to file where the Gephi XML should be written.
121 output_file = sys.argv[2]
122 print "[ output_file =", output_file, "]"
123 # Construct graph from JSON
124 G = parse_json(input_file)
125 # Write Graph in Graph Exchange XML format
126 nx.write_gexf(G, output_file)