From caca6e339810304780a414893c5ecf2802b4a275 Mon Sep 17 00:00:00 2001 From: Janus Varmarken Date: Wed, 25 Oct 2017 21:52:08 -0700 Subject: [PATCH] Add base_gefx_generator.py: script that constructs a .gefx file from JSON generated by extract_from_tshark.py. The script generates a graph in which nodes are hosts and edges indicate that there is communication between the hosts. The script label/identify hosts by their IPs. It should serve as a starting point when we want to include more information in the graphs (e.g. host name). --- base_gefx_generator.py | 75 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 base_gefx_generator.py diff --git a/base_gefx_generator.py b/base_gefx_generator.py new file mode 100644 index 0000000..fd2a55e --- /dev/null +++ b/base_gefx_generator.py @@ -0,0 +1,75 @@ +#!/usr/bin/python + +""" +Script that constructs a graph in which hosts are nodes. +An edge between two hosts indicate that the hosts communicate. +Hosts are labeled and identified by their IPs. +The graph is written to a file in Graph Exchange XML format for later import and visual inspection in Gephi. + +The input to this script is the JSON output by extract_from_tshark.py by Anastasia Shuba. + +This script is a simplification of Milad Asgari's parser_data_to_gephi.py script. +It serves as a baseline for future scripts that want to include more information in the graph. +""" + +import socket +import json +import tldextract +import networkx as nx +import sys + +def parse_json(file_path): + # Init empty graph + G = nx.DiGraph() + with open(file_path) as jf: + # Read JSON. + # data becomes reference to root JSON object (or in our case json array) + data = json.load(jf) + # Loop through json objects in data + for k in data: + # Fetch source and destination IPs. + # Each of these become a Node in the Graph. + src_ip = data[k]["src_ip"] + dst_ip = data[k]["dst_ip"] + ''' Graph construction ''' + # No need to check if the Nodes and/or Edges we add already exist: + # NetworkX won't add already existing nodes/edges (except in the case of a MultiGraph or MultiDiGraph (see NetworkX doc)). + # Add a node for each host. + G.add_node(src_ip) + G.add_node(dst_ip) + # Connect these two nodes. + G.add_edge(src_ip, dst_ip) + return G + +# ------------------------------------------------------ +# Not currently used. +# Might be useful later on if we wish to resolve IPs. +def get_domain(host): + ext_result = tldextract.extract(str(host)) + # Be consistent with ReCon and keep suffix + domain = ext_result.domain + "." + ext_result.suffix + return domain + +def is_IP(addr): + try: + socket.inet_aton(addr) + return True + except socket.error: + return False +# ------------------------------------------------------ + +if __name__ == '__main__': + if len(sys.argv) < 3: + print "Usage:", sys.argv[0], "input_file output_file" + print "outfile_file should end in .gexf" + sys.exit(0) + # Input file: Path to JSON file generated from tshark JSON output using Anastasia's script (extract_from_tshark.py). + input_file = sys.argv[1] + print "[ input_file =", input_file, "]" + # Output file: Path to file where the Gephi XML should be written. + output_file = sys.argv[2] + print "[ output_file =", output_file, "]" + # Construct graph from JSON + G = parse_json(input_file) + # Write Graph in Graph Exchange XML format + nx.write_gexf(G, output_file) -- 2.34.1