python_ml/plotting-dbscan-complete.py

   1 from sklearn.cluster import DBSCAN
   2 from sklearn import metrics
   3 import matplotlib.cm as cm
   4 import numpy as np
   5 import matplotlib.pyplot as plt
   6
   7 # Create a subplot with 1 row and 2 columns
   8 fig, (ax2) = plt.subplots(1, 1)
   9 fig.set_size_inches(7, 7)
  10
  11
  12 # Read from file
  13 # TODO: Just change the following path and filename
  14 #       when needed to read from a different file
  15 path = "/scratch/July-2018/Pairs2/"
  16 device1 = "alexa2-on"
  17 device2 = "alexa2-off"
  18 filename1 = device1 + ".txt"
  19 filename2 = device2 + ".txt"
  20
  21 # Number of triggers
  22 trig = 50
  23
  24 # PLOTTING FOR DEVICE ON EVENT
  25 # Read and create an array of pairs
  26 with open(path + filename1, "r") as pairs:
  27         pairsArr = []
  28         for line in pairs:
  29                 # We will see a pair and we need to split it into xpoint and ypoint
  30                 xpoint, ypoint = line.split(", ")
  31                 pair = [int(xpoint), int(ypoint)]
  32                 pairsArr.append(pair)
  33
  34 # Formed array of pairs
  35 #print(pairsArr)
  36 X = np.array(pairsArr);
  37
  38 # Compute DBSCAN
  39 # eps = distances
  40 # min_samples = minimum number of members of a cluster
  41 db = DBSCAN(eps=10, min_samples=trig - 5).fit(X)
  42 core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
  43 core_samples_mask[db.core_sample_indices_] = True
  44 labels = db.labels_
  45
  46 # Number of clusters in labels, ignoring noise if present.
  47 n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
  48 #print('Estimated number of clusters: %d' % n_clusters_)
  49
  50 # Black removed and is used for noise instead.
  51 unique_labels = set(labels)
  52 #print("Labels: " + str(labels))
  53
  54 colors = [plt.cm.Spectral(each)
  55               for each in np.linspace(0, 1, len(unique_labels))]
  56 for k, col in zip(unique_labels, colors):
  57         if k == -1:
  58             # Red used for noise.
  59             col = [1, 0, 0, 1]
  60
  61         class_member_mask = (labels == k)
  62
  63         print("Unique label: " + str(k) + " with freq: " + str(labels.tolist().count(k)))
  64         xy = X[class_member_mask & core_samples_mask]
  65         plt.plot(xy[:, 0], xy[:, 1], 'o',
  66                  markeredgecolor='k', markersize=10)
  67
  68         xy = X[class_member_mask & ~core_samples_mask]
  69         plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
  70                  markeredgecolor='k', markersize=6)
  71
  72 count = 0
  73 for pair in pairsArr:
  74         if labels[count] == -1:
  75                 plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]), fontsize=10)
  76         else:
  77         # Only print the frequency when this is a real cluster
  78                 plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]) +
  79                         "\nFreq:" + str(labels.tolist().count(labels[count])), fontsize=10)
  80         count = count + 1
  81
  82 #====================================================================================================
  83
  84 # PLOTTING FOR DEVICE ON EVENT
  85 # Read and create an array of pairs
  86 with open(path + filename2, "r") as pairs:
  87         pairsArr = []
  88         for line in pairs:
  89                 # We will see a pair and we need to split it into xpoint and ypoint
  90                 xpoint, ypoint = line.split(", ")
  91                 pair = [int(xpoint), int(ypoint)]
  92                 pairsArr.append(pair)
  93
  94 # Formed array of pairs
  95 #print(pairsArr)
  96 X = np.array(pairsArr);
  97
  98 # Compute DBSCAN
  99 # eps = distances
 100 # min_samples = minimum number of members of a cluster
 101 db = DBSCAN(eps=10, min_samples=trig - 5).fit(X)
 102 core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
 103 core_samples_mask[db.core_sample_indices_] = True
 104 labels = db.labels_
 105
 106 # Number of clusters in labels, ignoring noise if present.
 107 n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
 108 #print('Estimated number of clusters: %d' % n_clusters_)
 109
 110 import matplotlib.pyplot as plt
 111
 112 # Black removed and is used for noise instead.
 113 unique_labels = set(labels)
 114 #print("Labels: " + str(labels))
 115
 116 colors = [plt.cm.Spectral(each)
 117               for each in np.linspace(0, 1, len(unique_labels))]
 118 for k, col in zip(unique_labels, colors):
 119         if k == -1:
 120             # Green used for noise.
 121             col = [0, 1, 0, 1]
 122
 123         class_member_mask = (labels == k)
 124
 125         print("Unique label: " + str(k) + " with freq: " + str(labels.tolist().count(k)))
 126         xy = X[class_member_mask & core_samples_mask]
 127         plt.plot(xy[:, 0], xy[:, 1], 'o',
 128                  markeredgecolor='k', markersize=10)
 129
 130         xy = X[class_member_mask & ~core_samples_mask]
 131         plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
 132                  markeredgecolor='k', markersize=6)
 133
 134 count = 0
 135 for pair in pairsArr:
 136         if labels[count] == -1:
 137                 plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]), fontsize=10)
 138         else:
 139         # Only print the frequency when this is a real cluster
 140                 plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]) +
 141                         "\nFreq:" + str(labels.tolist().count(labels[count])), fontsize=10)
 142         count = count + 1
 143
 144
 145
 146 plt.title(device1 + ' & ' + device2)
 147 plt.show()
 148
 149