4 Extension of base_gefx_generator.py.
5 This script constructs a bipartite graph with IoT devices on one side and Internet hosts on the other side.
6 As a result, this graph does NOT show inter IoT device communication.
8 The input to this script is the Wirshark's/tshark's JSON representation of a packet trace.
17 from networkx.algorithms import bipartite
24 import parser.parse_dns
26 DEVICE_MAC_LIST = "devicelist.dat"
27 COLUMN_MAC = "MAC_address"
28 COLUMN_DEVICE_NAME = "device_name"
31 JSON_KEY_SOURCE = "_source"
32 JSON_KEY_LAYERS = "layers"
33 JSON_KEY_FRAME = "frame"
34 JSON_KEY_FRAME_TIME_EPOCH = "frame.time_epoch"
36 JSON_KEY_ETH_SRC = "eth.src"
37 JSON_KEY_ETH_DST = "eth.dst"
39 JSON_KEY_IP_SRC = "ip.src"
40 JSON_KEY_IP_DST = "ip.dst"
43 JSON_KEY_MDNS = "mdns"
44 JSON_KEY_BOOTP = "bootp"
45 JSON_KEY_SSDP = "ssdp"
46 JSON_KEY_DHCPV6 = "dhcpv6"
47 JSON_KEY_LLMNR = "llmnr"
50 def parse_json(file_path):
52 # Open the device MAC list file
53 with open(DEVICE_MAC_LIST) as csvfile:
54 maclist = csv.DictReader(csvfile, (COLUMN_MAC, COLUMN_DEVICE_NAME))
57 crudelist.append(item)
59 # Create key-value dictionary
61 for item in crudelist:
62 devlist[item[COLUMN_MAC]] = item[COLUMN_DEVICE_NAME]
64 # First parse the file once, constructing a map that contains information about individual devices' DNS resolutions.
65 device_dns_mappings = parser.parse_dns.parse_json_dns(file_path) # "./json/eth1.dump.json"
69 # Parse file again, this time constructing a graph of device<->server and device<->device communication.
70 with open(file_path) as jf:
72 # data becomes reference to root JSON object (or in our case json array)
74 # Loop through json objects (packets) in data
76 # p is a JSON object, not an index
77 # Drill down to object containing data from the different layers
78 layers = p[JSON_KEY_SOURCE][JSON_KEY_LAYERS]
80 # Skip all MDNS traffic.
81 if JSON_KEY_MDNS in layers:
84 # Skip all LLMNR traffic.
85 if JSON_KEY_LLMNR in layers:
88 # Skip all SSDP traffic - we don't care about disovery, only the actual communication.
89 if JSON_KEY_SSDP in layers:
92 # Skip all bootp traffic (DHCP related)
93 if JSON_KEY_BOOTP in layers:
96 # Skip DHCPv6 for now.
97 if JSON_KEY_DHCPV6 in layers:
100 # Skip any non udp/non tcp traffic
101 if JSON_KEY_UDP not in layers and JSON_KEY_TCP not in layers:
104 # Skip any non IP traffic
105 if JSON_KEY_IP not in layers:
108 # Fetch timestamp of packet (router's timestamp)
109 packet_timestamp = Decimal(layers[JSON_KEY_FRAME][JSON_KEY_FRAME_TIME_EPOCH])
110 # Fetch source and destination MACs
111 eth = layers.get(JSON_KEY_ETH, None)
113 print "[ WARNING: eth data not found ]"
115 eth_src = eth.get(JSON_KEY_ETH_SRC, None)
116 eth_dst = eth.get(JSON_KEY_ETH_DST, None)
117 # And source and destination IPs
118 ip_src = layers[JSON_KEY_IP][JSON_KEY_IP_SRC]
119 ip_dst = layers[JSON_KEY_IP][JSON_KEY_IP_DST]
121 src_is_local = ip_src.startswith("192.168.")
122 dst_is_local = ip_dst.startswith("192.168.")
127 # Values for the 'bipartite' attribute of a node when constructing the bipartite graph
129 bipartite_web_server = 1
131 # Skip inter-IoT device communication.
132 if src_is_local and dst_is_local:
136 G.add_node(eth_src, Name=devlist[eth_src], bipartite=bipartite_iot)
139 # If the source is not local, then it's inbound traffic, and hence the eth_dst is the MAC of the IoT device.
141 # Guard against cases where a device does not perform DNS lookups (or the lookups occur before data collection starts)
142 if eth_dst in device_dns_mappings:
143 hostname = device_dns_mappings[eth_dst].hostname_for_ip_at_time(ip_src, packet_timestamp)
145 print "[ WARNING: No entry for", eth_dst, "in DNS query map ]"
148 # Use IP if no hostname mapping
150 G.add_node(hostname, bipartite=bipartite_web_server)
153 G.add_node(eth_dst, Name=devlist[eth_src], bipartite=bipartite_iot)
156 # If the destination is not local, then it's outbound traffic, and hence the eth_src is the MAC of the IoT device.
158 # Guard against cases where a device does not perform DNS lookups (or the lookups occur before data collection starts)
159 if eth_src in device_dns_mappings:
160 hostname = device_dns_mappings[eth_src].hostname_for_ip_at_time(ip_dst, packet_timestamp)
162 print "[ WARNING: No entry for", eth_src, "in DNS query map ]"
164 # Use IP if no hostname mapping
166 G.add_node(hostname, bipartite=bipartite_web_server)
168 G.add_edge(src_node, dst_node)
171 if __name__ == '__main__':
172 if len(sys.argv) < 3:
173 print "Usage:", sys.argv[0], "input_file output_file"
174 print "outfile_file should end in .gexf"
176 # Input file: Path to Wireshark/tshark JSON file.
177 input_file = sys.argv[1]
178 print "[ input_file =", input_file, "]"
179 # Output file: Path to file where the Gephi XML should be written.
180 output_file = sys.argv[2]
181 print "[ output_file =", output_file, "]"
182 # Construct graph from JSON
183 G = parse_json(input_file)
184 # Write Graph in Graph Exchange XML format
185 nx.write_gexf(G, output_file)