Adding a new flow to yield per device data, analysis, and graphs.
authorrtrimana <rtrimana@uci.edu>
Tue, 6 Feb 2018 23:40:26 +0000 (15:40 -0800)
committerrtrimana <rtrimana@uci.edu>
Tue, 6 Feb 2018 23:40:26 +0000 (15:40 -0800)
main_flow.sh [new file with mode: 0755]
parser/parse_packet_frequency.py
plot_scripts/plot_ts_graph_wemo [new file with mode: 0644]
run.sh [new file with mode: 0755]

diff --git a/main_flow.sh b/main_flow.sh
new file mode 100755 (executable)
index 0000000..499845b
--- /dev/null
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+# This is the main script that calls every other script that is needed for the main flow
+if [ $# -ne 5 ]
+    then
+        echo "Usage: main_flow.sh <json-file-local> <json-file-internet> <output-file-name>"
+        echo "       <json-file-local>    = JSON file of local traffic captured on WLAN interfaces"
+        echo "       <json-file-internet> = JSON file of internet traffic captured on ETH interfaces"
+        echo "       <output-file-name>   = base name for the output files"
+        echo "       <device-name>        = device name"
+        echo "       <device-mac-address> = device MAC address"
+        exit 1
+fi
+
+# Check result folder and create one if it does not exist yet
+[ -d $5 ] || mkdir $5
+
+# Run the analysis
+python ./base_gexf_generator.py $1 $3_local.gexf
+python ./base_gexf_generator.py $2 $3_internet.gexf
+python ./parser/parse_packet_frequency.py $1 $3_local $4 $5
+python ./parser/parse_packet_frequency.py $2 $3_internet $4 $5
+gnuplot ./plot_scripts/plot_ts_graph_wemo
+
index 4d0fea2..a08fcba 100644 (file)
@@ -21,8 +21,9 @@ JSON_KEY_ETH_DST = "eth.dst"
 JSON_KEY_ETH_SRC = "eth.src"
 JSON_KEY_FRAME = "frame"
 JSON_KEY_FRAME_TIME = "frame.time"
+JSON_KEY_FRAME_LEN = "frame.len"
 TABLE_HEADER_X = "Timestamp (hh:mm:ss)"
-TABLE_HEADER_Y = "Packet frequency (pps)"
+TABLE_HEADER_Y = "Packet frequency"
 INCOMING_APPENDIX = "_incoming"
 OUTGOING_APPENDIX = "_outgoing"
 FILE_APPENDIX = ".dat"
@@ -32,9 +33,10 @@ WINDOW_SIZE = 5
 USE_MOVING_AVERAGE = False
 USE_BINNING = True
 # Range = 6, i.e. 3 to left and 3 to right (in seconds)
-TOTAL_RANGE = 60 # TOTAL_RANGE = 2 x RANGE
-RANGE = 30
-
+#TOTAL_RANGE = 60 # TOTAL_RANGE = 2 x RANGE
+#RANGE = 30
+TOTAL_RANGE = 20 # TOTAL_RANGE = 2 x RANGE
+RANGE = 10
 
 def moving_average(array, window=3):
     """ Calculate moving average
@@ -90,13 +92,13 @@ def seconds_to_hms(t):
         ss = "0" + ss
     return hh + ":" + mm + ":" + ss
     
-def include_timestamps_zero_packets(timefreq):
+def include_timestamps_zero_packets(timelen):
     """ Include every second that has zero packets (no packets/transmission)
         Args:
-            timefreq = dictionary that maps timestamps to number of packets
+            timelen = dictionary that maps timestamps to packet length
     """
     sortedkeylist = []
-    for key in sorted(timefreq):
+    for key in sorted(timelen):
         sortedkeylist.append(key)
     first = sortedkeylist[0]
     last = sortedkeylist[len(sortedkeylist)-1]
@@ -108,10 +110,10 @@ def include_timestamps_zero_packets(timefreq):
     counter = 0
     while counter < seconds:
         timestamp = seconds_to_hms(first_seconds + counter)
-        if timestamp not in timefreq:
-            timefreq[timestamp] = 0
+        if timestamp not in timelen:
+            timelen[timestamp] = 0
         counter += 1
-    return timefreq
+    return timelen
     
 
 def save_to_file(tblheader, dictionary, filenameout):
@@ -205,17 +207,17 @@ def main():
         print "Usage: python", sys.argv[0], "<input_file> <output_file> <device_name> <mac_address>"
         return
     # Parse the file for the specified MAC address
-    timefreq_incoming = parse_json(sys.argv[1], sys.argv[4], True)
-    timefreq_incoming = include_timestamps_zero_packets(timefreq_incoming)
-    timefreq_outgoing = parse_json(sys.argv[1], sys.argv[4], False)
-    timefreq_outgoing = include_timestamps_zero_packets(timefreq_outgoing)
+    timelen_incoming = parse_json(sys.argv[1], sys.argv[4], True)
+    timelen_incoming = include_timestamps_zero_packets(timelen_incoming)
+    timelen_outgoing = parse_json(sys.argv[1], sys.argv[4], False)
+    timelen_outgoing = include_timestamps_zero_packets(timelen_outgoing)
     # Write statistics into file
     print "====================================================================="
     print "==> Analyzing incoming traffic ..."
-    save_to_file(sys.argv[3] + INCOMING_APPENDIX, timefreq_incoming, sys.argv[2] + INCOMING_APPENDIX + FILE_APPENDIX)
+    save_to_file(sys.argv[3] + INCOMING_APPENDIX, timelen_incoming, sys.argv[2] + INCOMING_APPENDIX + FILE_APPENDIX)
     print "====================================================================="
     print "==> Analyzing outgoing traffic ..."
-    save_to_file(sys.argv[3] + OUTGOING_APPENDIX, timefreq_outgoing, sys.argv[2] + OUTGOING_APPENDIX + FILE_APPENDIX)
+    save_to_file(sys.argv[3] + OUTGOING_APPENDIX, timelen_outgoing, sys.argv[2] + OUTGOING_APPENDIX + FILE_APPENDIX)
     print "====================================================================="
     #for time in time_freq.keys():
     #for key in sorted(time_freq):
@@ -232,8 +234,8 @@ def parse_json(filepath, macaddress, incomingoutgoing):
             incomingoutgoing: boolean to define whether we collect incoming or outgoing traffic
                               True = incoming, False = outgoing
     """
-    # Maps timestamps to frequencies of packets
-    timefreq = dict()
+    # Maps timestamps to lengths of packets
+    timelen = dict()
     with open(filepath) as jf:
         # Read JSON.
         # data becomes reference to root JSON object (or in our case json array)
@@ -246,6 +248,8 @@ def parse_json(filepath, macaddress, incomingoutgoing):
             # Get timestamp
             frame = layers.get(JSON_KEY_FRAME, None)
             datetime = frame.get(JSON_KEY_FRAME_TIME, None)
+            # Get frame length
+            length = frame.get(JSON_KEY_FRAME_LEN, None)
             # Get into the Ethernet address part
             eth = layers.get(JSON_KEY_ETH, None)
             # Skip any non DNS traffic
@@ -259,26 +263,26 @@ def parse_json(filepath, macaddress, incomingoutgoing):
             datetimeobj = parser.parse(datetime)
             # Remove the microsecond part
             timestr = str(datetimeobj.time())[:8]
-            print str(timestr) + " - src:" + str(src) + " - dest:" + str(dst)
+            print str(timestr) + " - src:" + str(src) + " - dest:" + str(dst) + " - length: ", length
             # Get and count the traffic for the specified MAC address
             if incomingoutgoing:           
                 if dst == macaddress:
                     # Check if timestamp already exists in the map
                     # If yes, then just increment the frequency value...
-                    if timestr in timefreq:
-                        timefreq[timestr] = timefreq[timestr] + 1
+                    if timestr in timelen:
+                        timelen[timestr] = timelen[timestr] + int(length)
                     else: # If not, then put the value one there
-                        timefreq[timestr] = 1
+                        timelen[timestr] = int(length)
             else:
                 if src == macaddress:
                     # Check if timestamp already exists in the map
                     # If yes, then just increment the frequency value...
-                    if timestr in timefreq:
-                        timefreq[timestr] = timefreq[timestr] + 1
+                    if timestr in timelen:
+                        timelen[timestr] = timelen[timestr] + int(length)
                     else: # If not, then put the value one there
-                        timefreq[timestr] = 1
+                        timelen[timestr] = int(length)
 
-    return timefreq
+    return timelen
 
 
 if __name__ == '__main__':
diff --git a/plot_scripts/plot_ts_graph_wemo b/plot_scripts/plot_ts_graph_wemo
new file mode 100644 (file)
index 0000000..556b543
--- /dev/null
@@ -0,0 +1,42 @@
+# Script to plot time series graphs for network traffic analysis
+#
+# by Rahmadi Trimananda (rahmadi.trimananda@uci.edu)
+# Programming Language Research Group @ University of California, Irvine
+# Winter 2018
+
+# ************ #
+# BASIC SETUP  #
+# ************ #
+set terminal pngcairo enhanced font 'Verdana,10'
+set autoscale
+unset key
+unset log
+unset label
+set xtics 200
+set ytics auto
+set xlabel "Packet Timestamp (hh:mm:ss)"
+set ylabel "Packet Size (bytes)"
+set xdata time
+set timefmt "%H:%M:%S"
+set xrange [:]
+set yrange [0:]
+
+# ***************** #
+# PER DEVICE SETUP  #
+# ***************** #
+# WeMo switch local
+set output './result/wemo_switch_timestamp_local_incoming.png'
+set title "WeMo Switch Incoming Local Traffic"
+plot "./result/wemo_switch_local_incoming.dat" using 1:2 with lines
+set output './result/wemo_switch_timestamp_local_outgoing.png'
+set title "WeMo Switch Outgoing Local Traffic"
+plot "./result/wemo_switch_local_outgoing.dat" using 1:2 with lines
+
+# WeMo switch internet
+set output './result/wemo_switch_timestamp_internet_incoming.png'
+set title "WeMo Switch Incoming Internet Traffic"
+plot "./result/wemo_switch_internet_incoming.dat" using 1:2 with lines
+set output './result/wemo_switch_timestamp_internet_outgoing.png'
+set title "WeMo Switch Outgoing Internet Traffic"
+plot "./result/wemo_switch_internet_outgoing.dat" using 1:2 with lines
+
diff --git a/run.sh b/run.sh
new file mode 100755 (executable)
index 0000000..7465ad9
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+# This lists down all the calls to the main_flow.sh script.
+# Basically, we make one call per one device that we want to analyze.
+./main_flow.sh json/eth1.dump.json json/eth1.dump.json result/wemo_switch WeMo_Switch 94:10:3e:36:60:09
+#./main_flow.sh json/eth1.dump.json json/eth1.dump.json result/google_nexus Google_Nexus 64:bc:0c:43:3f:40