From 6051aa9f595e878dac357be3cf6a162150194f1c Mon Sep 17 00:00:00 2001 From: rtrimana Date: Fri, 31 Aug 2018 16:30:15 -0700 Subject: [PATCH] Adding plotting using DBSCAN --- python_ml/plotting-dbscan.py | 85 ++++++++++++++++++++++++++++++++++++ python_ml/plotting.py | 6 +-- 2 files changed, 88 insertions(+), 3 deletions(-) create mode 100644 python_ml/plotting-dbscan.py diff --git a/python_ml/plotting-dbscan.py b/python_ml/plotting-dbscan.py new file mode 100644 index 0000000..6362b2f --- /dev/null +++ b/python_ml/plotting-dbscan.py @@ -0,0 +1,85 @@ +from sklearn.cluster import DBSCAN +from sklearn import metrics +import matplotlib.cm as cm +import numpy as np +import matplotlib.pyplot as plt + +# Create a subplot with 1 row and 2 columns +fig, (ax2) = plt.subplots(1, 1) +fig.set_size_inches(7, 7) + + +# Read from file +# TODO: Just change the following path and filename +# when needed to read from a different file +path = "/scratch/July-2018/Pairs/" +device = "dlink-off" +filename = device + ".txt" + +# Number of triggers +trig = 50 + +# Read and create an array of pairs +with open(path + filename, "r") as pairs: + pairsArr = [] + for line in pairs: + # We will see a pair and we need to split it into xpoint and ypoint + xpoint, ypoint = line.split(", ") + pair = [int(xpoint), int(ypoint)] + pairsArr.append(pair) + +# Formed array of pairs +#print(pairsArr) +X = np.array(pairsArr); + +# Compute DBSCAN +# eps = distances +# min_samples = minimum number of members of a cluster +db = DBSCAN(eps=10, min_samples=trig - 5).fit(X) +core_samples_mask = np.zeros_like(db.labels_, dtype=bool) +core_samples_mask[db.core_sample_indices_] = True +labels = db.labels_ + +# Number of clusters in labels, ignoring noise if present. +n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) + +print('Estimated number of clusters: %d' % n_clusters_) + +import matplotlib.pyplot as plt + +# Black removed and is used for noise instead. +unique_labels = set(labels) +print("Labels: " + str(labels)) + +colors = [plt.cm.Spectral(each) + for each in np.linspace(0, 1, len(unique_labels))] +for k, col in zip(unique_labels, colors): + if k == -1: + # Black used for noise. + col = [0, 0, 0, 1] + + class_member_mask = (labels == k) + + xy = X[class_member_mask & core_samples_mask] + plt.plot(xy[:, 0], xy[:, 1], 'o', + markeredgecolor='k', markersize=14) + + xy = X[class_member_mask & ~core_samples_mask] + plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col), + markeredgecolor='k', markersize=6) + +count = 0 +for pair in pairsArr: + #if labels[count] != -1: + # If this is not a noise (i.e.,real data) + # plt.text(pair[0], pair[1], "Freq: " + str(labels.tolist().count(labels[count])), fontsize=10) + + plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]) + + "\nFreq: " + str(labels.tolist().count(labels[count])), fontsize=10) + count = count + 1 + + +plt.title(device + ' - Estimated number of clusters: %d' % n_clusters_) +plt.show() + + diff --git a/python_ml/plotting.py b/python_ml/plotting.py index 8218c80..0089d02 100644 --- a/python_ml/plotting.py +++ b/python_ml/plotting.py @@ -12,7 +12,7 @@ fig.set_size_inches(7, 7) # TODO: Just change the following path and filename # when needed to read from a different file path = "/scratch/July-2018/Pairs/" -filename = "alexa-off.txt" +filename = "dlink-off.txt" # Read and create an array of pairs with open(path + filename, "r") as pairs: @@ -27,14 +27,14 @@ with open(path + filename, "r") as pairs: #print(pairsArr) X = np.array(pairsArr); -clusters = 25 +clusters = 6 # Plot the data points based on the clusters clusterer = KMeans(n_clusters=clusters, random_state=10) cluster_labels = clusterer.fit_predict(X) # 2nd Plot showing the actual clusters formed colors = cm.nipy_spectral(cluster_labels.astype(float) / clusters) -ax2.scatter(X[:, 0], X[:, 1], marker='o', s=100, lw=0, alpha=0.3, +ax2.scatter(X[:, 0], X[:, 1], marker='o', s=50, lw=0, alpha=0.3, c=colors, edgecolor='k') # Labeling the clusters -- 2.34.1