From c3dfedb1170a2dd0a3ca5627a3da09576240b51f Mon Sep 17 00:00:00 2001 From: rtrimana Date: Mon, 10 Sep 2018 17:05:12 -0700 Subject: [PATCH] Adding a proof of concept for clustering with source and destination (4 dimensional instead of 2 dimensional). --- .../main/java/edu/uci/iotproject/Main.java | 8 +- python_ml/plotting-dbscan-diff.py | 3 +- python_ml/plotting-dbscan-src-dst.py | 94 +++++++++++++++++++ python_ml/plotting-dbscan.py | 5 +- 4 files changed, 103 insertions(+), 7 deletions(-) create mode 100644 python_ml/plotting-dbscan-src-dst.py diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java index a855f47..1d3888a 100644 --- a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java +++ b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java @@ -126,10 +126,10 @@ public class Main { // final String deviceIp = "192.168.1.246"; // .246 == phone; .229 == sprinkler // 13) DLink siren August 14 experiment -// final String inputPcapFile = path + "/2018-08/dlink-siren/dlink-siren.wlan1.local.pcap"; -// final String outputPcapFile = path + "/2018-08/dlink-siren/dlink-siren-processed.pcap"; -// final String triggerTimesFile = path + "/2018-08/dlink-siren/dlink-siren-aug-14-2018.timestamps"; -// final String deviceIp = "192.168.1.246"; // .246 == phone; .183 == siren + final String inputPcapFile = path + "/2018-08/dlink-siren/dlink-siren.wlan1.local.pcap"; + final String outputPcapFile = path + "/2018-08/dlink-siren/dlink-siren-processed.pcap"; + final String triggerTimesFile = path + "/2018-08/dlink-siren/dlink-siren-aug-14-2018.timestamps"; + final String deviceIp = "192.168.1.183"; // .246 == phone; .183 == siren // 14) Nest thermostat August 15 experiment // final String inputPcapFile = path + "/2018-08/nest/nest.wlan1.local.pcap"; diff --git a/python_ml/plotting-dbscan-diff.py b/python_ml/plotting-dbscan-diff.py index ba89a15..15d7728 100644 --- a/python_ml/plotting-dbscan-diff.py +++ b/python_ml/plotting-dbscan-diff.py @@ -65,6 +65,7 @@ unique_labels = set(labels) colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))] for k, col in zip(unique_labels, colors): + cluster_col = [1, 0, 0, 1] if k == -1: # Black used for noise. col = [0, 0, 0, 1] @@ -73,7 +74,7 @@ for k, col in zip(unique_labels, colors): # print("Unique label: " + str(k) + " with freq: " + str(labels.tolist().count(k))) xy = X[class_member_mask & core_samples_mask] - plt.plot(xy[:, 0], xy[:, 1], 'o', + plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(cluster_col), markeredgecolor='k', markersize=10) xy = X[class_member_mask & ~core_samples_mask] diff --git a/python_ml/plotting-dbscan-src-dst.py b/python_ml/plotting-dbscan-src-dst.py new file mode 100644 index 0000000..ff6ceaf --- /dev/null +++ b/python_ml/plotting-dbscan-src-dst.py @@ -0,0 +1,94 @@ +from sklearn.cluster import DBSCAN +from sklearn import metrics +import matplotlib.cm as cm +import numpy as np +import matplotlib.pyplot as plt + +# Create a subplot with 1 row and 2 columns +fig, (ax2) = plt.subplots(1, 1) +fig.set_size_inches(7, 7) + + +# Read from file +# TODO: Just change the following path and filename +# when needed to read from a different file +path = "/scratch/July-2018/Pairs2/" +device = "dlink-siren-off2" +filename = device + ".txt" +plt.ylim(0, 2000) +plt.xlim(0, 2000) + +# Number of triggers +trig = 50 + +# Read and create an array of pairs +with open(path + filename, "r") as pairs: + pairsArr = [] + for line in pairs: + # We will see a pair and we need to split it into xpoint and ypoint + xpoint, ypoint, src, dst = line.split(", ") + pair = [int(xpoint), int(ypoint), int(src), int(dst)] + pairsArr.append(pair) + +# Formed array of pairs +#print(pairsArr) +X = np.array(pairsArr); + +# Compute DBSCAN +# eps = distances +# min_samples = minimum number of members of a cluster +#db = DBSCAN(eps=20, min_samples=trig - 5).fit(X) +# TODO: This is just for seeing more clusters +db = DBSCAN(eps=20, min_samples=trig - 49).fit(X) +core_samples_mask = np.zeros_like(db.labels_, dtype=bool) +core_samples_mask[db.core_sample_indices_] = True +labels = db.labels_ + +# Number of clusters in labels, ignoring noise if present. +n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) + +#print('Estimated number of clusters: %d' % n_clusters_) + +import matplotlib.pyplot as plt + +# Black removed and is used for noise instead. +unique_labels = set(labels) +print("Labels: " + str(labels)) + +colors = [plt.cm.Spectral(each) + for each in np.linspace(0, 1, len(unique_labels))] +for k, col in zip(unique_labels, colors): + cluster_col = [1, 0, 0, 1] + if k == -1: + # Black used for noise. + col = [0, 0, 0, 1] + + class_member_mask = (labels == k) + + xy = X[class_member_mask & core_samples_mask] + plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(cluster_col), + markeredgecolor='k', markersize=10) + + xy = X[class_member_mask & ~core_samples_mask] + plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col), + markeredgecolor='k', markersize=6) + +count = 0 +for pair in pairsArr: + #if labels[count] != -1: + # If this is not a noise (i.e.,real data) + # plt.text(pair[0], pair[1], "Freq: " + str(labels.tolist().count(labels[count])), fontsize=10) + + if labels[count] == -1: + plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]), fontsize=10) + else: + # Only print the frequency when this is a real cluster + plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]) + + " f: " + str(labels.tolist().count(labels[count])), fontsize=10) + count = count + 1 + + +plt.title(device + ' - Clusters: %d' % n_clusters_) +plt.show() + + diff --git a/python_ml/plotting-dbscan.py b/python_ml/plotting-dbscan.py index 9b5d1ab..580d4df 100644 --- a/python_ml/plotting-dbscan.py +++ b/python_ml/plotting-dbscan.py @@ -58,6 +58,7 @@ unique_labels = set(labels) colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))] for k, col in zip(unique_labels, colors): + cluster_col = [1, 0, 0, 1] if k == -1: # Black used for noise. col = [0, 0, 0, 1] @@ -65,7 +66,7 @@ for k, col in zip(unique_labels, colors): class_member_mask = (labels == k) xy = X[class_member_mask & core_samples_mask] - plt.plot(xy[:, 0], xy[:, 1], 'o', + plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(cluster_col), markeredgecolor='k', markersize=10) xy = X[class_member_mask & ~core_samples_mask] @@ -83,7 +84,7 @@ for pair in pairsArr: else: # Only print the frequency when this is a real cluster plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]) + - " - Freq: " + str(labels.tolist().count(labels[count])), fontsize=10) + " f: " + str(labels.tolist().count(labels[count])), fontsize=10) count = count + 1 -- 2.34.1