4 Script that constructs a graph in which hosts are nodes.
5 An edge between two hosts indicate that the hosts communicate.
6 Hosts are labeled and identified by their IPs.
7 The graph is written to a file in Graph Exchange XML format for later import and visual inspection in Gephi.
9 The input to this script is the JSON output by extract_from_tshark.py by Anastasia Shuba.
11 This script is a simplification of Milad Asgari's parser_data_to_gephi.py script.
12 It serves as a baseline for future scripts that want to include more information in the graph.
23 def parse_json(file_path):
25 maps_tuple = parse_dns.parse_json_dns("./dns.json")
26 hn_ip_map = maps_tuple[0]
27 ip_hn_map = maps_tuple[1]
31 with open(file_path) as jf:
33 # data becomes reference to root JSON object (or in our case json array)
35 # Loop through json objects in data
38 # Fetch source and destination IPs.
39 # Each of these become a Node in the Graph.
40 src_ip = data[k]["src_ip"]
41 dst_ip = data[k]["dst_ip"]
43 if dst_ip in ip_hn_map:
44 # hack to get first element in set
45 for e in ip_hn_map[dst_ip]:
49 ''' Graph construction '''
50 # No need to check if the Nodes and/or Edges we add already exist:
51 # NetworkX won't add already existing nodes/edges (except in the case of a MultiGraph or MultiDiGraph (see NetworkX doc)).
52 # Add a node for each host.
55 # Connect these two nodes.
56 G.add_edge(src_ip, dst_ip)
59 # ------------------------------------------------------
61 # Might be useful later on if we wish to resolve IPs.
63 ext_result = tldextract.extract(str(host))
64 # Be consistent with ReCon and keep suffix
65 domain = ext_result.domain + "." + ext_result.suffix
70 socket.inet_aton(addr)
74 # ------------------------------------------------------
76 if __name__ == '__main__':
78 print "Usage:", sys.argv[0], "input_file output_file"
79 print "outfile_file should end in .gexf"
81 # Input file: Path to JSON file generated from tshark JSON output using Anastasia's script (extract_from_tshark.py).
82 input_file = sys.argv[1]
83 print "[ input_file =", input_file, "]"
84 # Output file: Path to file where the Gephi XML should be written.
85 output_file = sys.argv[2]
86 print "[ output_file =", output_file, "]"
87 # Construct graph from JSON
88 G = parse_json(input_file)
89 # Write Graph in Graph Exchange XML format
90 nx.write_gexf(G, output_file)