+ return dev_list
+
+
+def traverse_and_merge_nodes(G, dev_list_file):
+ """ Merge nodes that have similar properties, e.g. same protocols
+ But, we only do this for leaves (outer nodes), and not for
+ nodes that are in the middle/have many neighbors.
+ The pre-condition is that the node:
+ (1) only has one neighbor, and
+ (2) not a smarthome device.
+ then we compare the edges, whether they use the same protocols
+ or not. If yes, then we collapse that node and we attach
+ it to the very first node that uses that set of protocols.
+ Args:
+ G: a complete networkx graph
+ dev_list_file: CSV file path that contains list of device MAC addresses
+ """
+ nodes = G.nodes()
+ #print "Nodes: ", nodes
+ node_to_merge = dict()
+ # Create list of smarthome devices
+ dev_list = create_device_list(DEVICE_MAC_LIST)
+ # Traverse every node
+ # Check that the node is not a smarthome device
+ for node in nodes:
+ neighbors = G.neighbors(node)
+ #print "Neighbors: ", neighbors, "\n"
+ # Skip if the node is a smarthome device
+ if node in dev_list:
+ continue
+ # Skip if the node has many neighbors (non-leaf) or no neighbor at all
+ if len(neighbors) is not 1:
+ continue
+ #print "Node: ", node
+ neighbor = neighbors[0]
+ #print "Neighbor: ", neighbors
+ protocols = G[node][neighbor]['Protocol']
+ #print "Protocol: ", protocols
+ # Store neighbor-protocol as key in dictionary
+ neigh_proto = neighbor + "-" + protocols
+ if neigh_proto not in node_to_merge:
+ node_to_merge[neigh_proto] = node
+ else:
+ # Merge this node if there is already an entry
+ # First delete
+ G.remove_node(node)
+ node_to_merge_with = node_to_merge[neigh_proto]
+ merged_nodes = G.node[node_to_merge_with]['Merged']
+ # Check if this is the first node
+ if merged_nodes is '':
+ merged_nodes = node
+ else:
+ # Put comma if there is already one or more nodes
+ merged_nodes += ", " + node
+ # Then attach as attribute
+ G.node[node_to_merge_with]['Merged'] = merged_nodes
+
+ return G
+
+
+def place_in_graph(G, eth_src, eth_dst, device_dns_mappings, dev_list, layers,
+ edge_to_prot, edge_to_vol):
+ """ Place nodes and edges on the graph
+ Args:
+ G: the complete graph
+ eth_src: MAC address of source
+ eth_dst: MAC address of destination
+ device_dns_mappings: device to DNS mappings (data structure)
+ dev_list: list of existing smarthome devices
+ layers: layers of JSON file structure
+ edge_to_prot: edge to protocols mappings
+ edge_to_vol: edge to traffic volume mappings
+ """
+ # Get timestamp of packet (router's timestamp)
+ timestamp = Decimal(layers[JSON_KEY_FRAME][JSON_KEY_FRAME_TIME_EPOCH])
+ # Get packet length
+ packet_len = Decimal(layers[JSON_KEY_FRAME][JSON_KEY_FRAME_LENGTH])
+ # Get the protocol and strip just the name of it
+ long_protocol = layers[JSON_KEY_FRAME][JSON_KEY_FRAME_PROTOCOLS]
+ # Split once starting from the end of the string and get it
+ split_protocol = long_protocol.split(':')
+ protocol = None
+ if len(split_protocol) < 5:
+ last_index = len(split_protocol) - 1
+ protocol = split_protocol[last_index]
+ else:
+ protocol = split_protocol[3] + ":" + split_protocol[4]
+ print "timestamp: ", timestamp, " - new protocol added: ", protocol, "\n"
+ # Store protocol into the set (source)
+ protocols = None
+ # Key to search in the dictionary is <src-mac-address>-<dst-mac_address>
+ dict_key = eth_src + "-" + eth_dst
+ if dict_key not in edge_to_prot:
+ edge_to_prot[dict_key] = set()
+ protocols = edge_to_prot[dict_key]
+ protocols.add(protocol)
+ protocols_str = ', '.join(protocols)
+ print "protocols: ", protocols_str, "\n"
+ # Check packet length and accumulate to get traffic volume
+ if dict_key not in edge_to_vol:
+ edge_to_vol[dict_key] = 0;
+ edge_to_vol[dict_key] = edge_to_vol[dict_key] + packet_len
+ volume = str(edge_to_vol[dict_key])
+ # And source and destination IPs
+ ip_src = layers[JSON_KEY_IP][JSON_KEY_IP_SRC]
+ ip_dst = layers[JSON_KEY_IP][JSON_KEY_IP_DST]
+ # Categorize source and destination IP addresses: local vs. non-local
+ ip_re = re.compile(r'\b192.168.[0-9.]+')
+ src_is_local = ip_re.search(ip_src)
+ dst_is_local = ip_re.search(ip_dst)
+ print "ip.src =", ip_src, "ip.dst =", ip_dst, "\n"
+ # Place nodes and edges
+ src_node = None
+ dst_node = None
+ if src_is_local:
+ G.add_node(eth_src, Name=dev_list[eth_src])
+ src_node = eth_src
+ else:
+ hostname = None
+ # Check first if the key (eth_dst) exists in the dictionary
+ if eth_dst in device_dns_mappings:
+ # If the source is not local, then it's inbound traffic, and hence the eth_dst is the MAC of the IoT device.
+ hostname = device_dns_mappings[eth_dst].hostname_for_ip_at_time(ip_src, timestamp)
+ if hostname is None:
+ # Use IP if no hostname mapping
+ hostname = ip_src
+ # Non-smarthome devices can be merged later
+ G.add_node(hostname, Merged='')
+ src_node = hostname
+
+ if dst_is_local:
+ G.add_node(eth_dst, Name=dev_list[eth_dst])
+ dst_node = eth_dst
+ else:
+ hostname = None
+ # Check first if the key (eth_dst) exists in the dictionary
+ if eth_src in device_dns_mappings:
+ # If the destination is not local, then it's outbound traffic, and hence the eth_src is the MAC of the IoT device.
+ hostname = device_dns_mappings[eth_src].hostname_for_ip_at_time(ip_dst, timestamp)
+ if hostname is None:
+ # Use IP if no hostname mapping
+ hostname = ip_dst
+ # Non-smarthome devices can be merged later
+ G.add_node(hostname, Merged='')
+ dst_node = hostname
+ G.add_edge(src_node, dst_node, Protocol=protocols_str, Volume=volume)
+