python_ml/plotting-dbscan.py

   1 from sklearn.cluster import DBSCAN
   2 from sklearn import metrics
   3 import matplotlib.cm as cm
   4 import numpy as np
   5 import matplotlib.pyplot as plt
   6
   7 # Create a subplot with 1 row and 2 columns
   8 fig, (ax2) = plt.subplots(1, 1)
   9 fig.set_size_inches(7, 7)
  10
  11
  12 # Read from file
  13 # TODO: Just change the following path and filename
  14 #       when needed to read from a different file
  15 path = "/scratch/July-2018/Pairs/"
  16 device = "dlink-off"
  17 filename = device + ".txt"
  18
  19 # Number of triggers
  20 trig = 50
  21
  22 # Read and create an array of pairs
  23 with open(path + filename, "r") as pairs:
  24         pairsArr = []
  25         for line in pairs:
  26                 # We will see a pair and we need to split it into xpoint and ypoint
  27                 xpoint, ypoint = line.split(", ")
  28                 pair = [int(xpoint), int(ypoint)]
  29                 pairsArr.append(pair)
  30
  31 # Formed array of pairs
  32 #print(pairsArr)
  33 X = np.array(pairsArr);
  34
  35 # Compute DBSCAN
  36 # eps = distances
  37 # min_samples = minimum number of members of a cluster
  38 db = DBSCAN(eps=10, min_samples=trig - 5).fit(X)
  39 core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
  40 core_samples_mask[db.core_sample_indices_] = True
  41 labels = db.labels_
  42
  43 # Number of clusters in labels, ignoring noise if present.
  44 n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
  45
  46 print('Estimated number of clusters: %d' % n_clusters_)
  47
  48 import matplotlib.pyplot as plt
  49
  50 # Black removed and is used for noise instead.
  51 unique_labels = set(labels)
  52 print("Labels: " + str(labels))
  53
  54 colors = [plt.cm.Spectral(each)
  55           for each in np.linspace(0, 1, len(unique_labels))]
  56 for k, col in zip(unique_labels, colors):
  57     if k == -1:
  58         # Black used for noise.
  59         col = [0, 0, 0, 1]
  60
  61     class_member_mask = (labels == k)
  62
  63     xy = X[class_member_mask & core_samples_mask]
  64     plt.plot(xy[:, 0], xy[:, 1], 'o',
  65              markeredgecolor='k', markersize=14)
  66
  67     xy = X[class_member_mask & ~core_samples_mask]
  68     plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
  69              markeredgecolor='k', markersize=6)
  70
  71 count = 0
  72 for pair in pairsArr:
  73         #if labels[count] != -1:
  74         # If this is not a noise (i.e.,real data)
  75         #       plt.text(pair[0], pair[1], "Freq: " + str(labels.tolist().count(labels[count])), fontsize=10)
  76
  77         plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]) +
  78                 "\nFreq: " + str(labels.tolist().count(labels[count])), fontsize=10)
  79         count = count + 1
  80
  81
  82 plt.title(device + ' - Estimated number of clusters: %d' % n_clusters_)
  83 plt.show()
  84
  85