Adding plotting using DBSCAN

author rtrimana <rtrimana@uci.edu>

Fri, 31 Aug 2018 23:30:15 +0000 (16:30 -0700)

committer rtrimana <rtrimana@uci.edu>

Fri, 31 Aug 2018 23:30:15 +0000 (16:30 -0700)
author rtrimana <rtrimana@uci.edu>
Fri, 31 Aug 2018 23:30:15 +0000 (16:30 -0700)
committer rtrimana <rtrimana@uci.edu>
Fri, 31 Aug 2018 23:30:15 +0000 (16:30 -0700)
diff --git a/python_ml/plotting-dbscan.py b/python_ml/plotting-dbscan.py

new file mode 100644 (file)

index 0000000..6362b2f
--- /dev/null
+++ b/python_ml/plotting-dbscan.py
@@ -0,0 +1,85 @@
+from sklearn.cluster import DBSCAN
+from sklearn import metrics
+import matplotlib.cm as cm
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Create a subplot with 1 row and 2 columns
+fig, (ax2) = plt.subplots(1, 1)
+fig.set_size_inches(7, 7)
+
+
+# Read from file
+# TODO: Just change the following path and filename 
+#      when needed to read from a different file
+path = "/scratch/July-2018/Pairs/"
+device = "dlink-off"
+filename = device + ".txt"
+
+# Number of triggers
+trig = 50
+
+# Read and create an array of pairs
+with open(path + filename, "r") as pairs:
+       pairsArr = []
+       for line in pairs:
+               # We will see a pair and we need to split it into xpoint and ypoint
+               xpoint, ypoint = line.split(", ")
+               pair = [int(xpoint), int(ypoint)]
+               pairsArr.append(pair)
+
+# Formed array of pairs                
+#print(pairsArr)
+X = np.array(pairsArr);
+
+# Compute DBSCAN
+# eps = distances
+# min_samples = minimum number of members of a cluster
+db = DBSCAN(eps=10, min_samples=trig - 5).fit(X)
+core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
+core_samples_mask[db.core_sample_indices_] = True
+labels = db.labels_
+
+# Number of clusters in labels, ignoring noise if present.
+n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
+
+print('Estimated number of clusters: %d' % n_clusters_)
+
+import matplotlib.pyplot as plt
+
+# Black removed and is used for noise instead.
+unique_labels = set(labels)
+print("Labels: " + str(labels))
+
+colors = [plt.cm.Spectral(each)
+          for each in np.linspace(0, 1, len(unique_labels))]
+for k, col in zip(unique_labels, colors):
+    if k == -1:
+        # Black used for noise.
+        col = [0, 0, 0, 1]
+
+    class_member_mask = (labels == k)
+
+    xy = X[class_member_mask & core_samples_mask]
+    plt.plot(xy[:, 0], xy[:, 1], 'o',
+             markeredgecolor='k', markersize=14)
+
+    xy = X[class_member_mask & ~core_samples_mask]
+    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
+             markeredgecolor='k', markersize=6)
+
+count = 0
+for pair in pairsArr:
+       #if labels[count] != -1:
+       # If this is not a noise (i.e.,real data)
+       #       plt.text(pair[0], pair[1], "Freq: " + str(labels.tolist().count(labels[count])), fontsize=10)
+       
+       plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]) + 
+               "\nFreq: " + str(labels.tolist().count(labels[count])), fontsize=10)
+       count = count + 1
+
+       
+plt.title(device + ' - Estimated number of clusters: %d' % n_clusters_)
+plt.show()
+
+
diff --git a/python_ml/plotting.py b/python_ml/plotting.py

index 8218c80618ccd4849ac4ff0eb82fc2d4013de1c3..0089d021dfc629c1b31e0f25dad689d76826ba54 100644 (file)
--- a/python_ml/plotting.py
+++ b/python_ml/plotting.py
@@ -12,7 +12,7 @@ fig.set_size_inches(7, 7)
  # TODO: Just change the following path and filename 
  #      when needed to read from a different file
  path = "/scratch/July-2018/Pairs/"
  # TODO: Just change the following path and filename 
  #      when needed to read from a different file
  path = "/scratch/July-2018/Pairs/"
-filename = "alexa-off.txt"
+filename = "dlink-off.txt"
  
  # Read and create an array of pairs
  with open(path + filename, "r") as pairs:
  
  # Read and create an array of pairs
  with open(path + filename, "r") as pairs:
@@ -27,14 +27,14 @@ with open(path + filename, "r") as pairs:
  #print(pairsArr)
  X = np.array(pairsArr);
  
  #print(pairsArr)
  X = np.array(pairsArr);
  
-clusters = 25
+clusters = 6
  
  # Plot the data points based on the clusters
  clusterer = KMeans(n_clusters=clusters, random_state=10)
  cluster_labels = clusterer.fit_predict(X)
  # 2nd Plot showing the actual clusters formed
  colors = cm.nipy_spectral(cluster_labels.astype(float) / clusters)
  
  # Plot the data points based on the clusters
  clusterer = KMeans(n_clusters=clusters, random_state=10)
  cluster_labels = clusterer.fit_predict(X)
  # 2nd Plot showing the actual clusters formed
  colors = cm.nipy_spectral(cluster_labels.astype(float) / clusters)
-ax2.scatter(X[:, 0], X[:, 1], marker='o', s=100, lw=0, alpha=0.3,
+ax2.scatter(X[:, 0], X[:, 1], marker='o', s=50, lw=0, alpha=0.3,
              c=colors, edgecolor='k')
  
  # Labeling the clusters
              c=colors, edgecolor='k')
  
  # Labeling the clusters
author	rtrimana <rtrimana@uci.edu>
	Fri, 31 Aug 2018 23:30:15 +0000 (16:30 -0700)
committer	rtrimana <rtrimana@uci.edu>
	Fri, 31 Aug 2018 23:30:15 +0000 (16:30 -0700)
python_ml/plotting-dbscan.py	[new file with mode: 0644]	patch \| blob
python_ml/plotting.py		patch \| blob \| history