python_ml/plotting-dbscan-diff.py

   1 from sklearn.cluster import DBSCAN
   2 from sklearn import metrics
   3 import matplotlib.cm as cm
   4 import numpy as np
   5 import matplotlib.pyplot as plt
   6
   7 # Create a subplot with 1 row and 2 columns
   8 fig, (ax2) = plt.subplots(1, 1)
   9 fig.set_size_inches(7, 7)
  10
  11 # Read from file
  12 # TODO: Just change the following path and filename
  13 #       when needed to read from a different file
  14 path = "/scratch/July-2018/Pairs2/"
  15 # TODO: Change the order of the files below to generate
  16 #               the diff plot reversedly
  17 device1 = "dlink-siren-device-off"
  18 device2 = "dlink-siren-device-on"
  19 filename1 = device1 + ".txt"
  20 filename2 = device2 + ".txt"
  21 plt.ylim(0, 2000)
  22 plt.xlim(0, 2000)
  23
  24 # Number of triggers
  25 trig = 50
  26
  27 # PLOTTING FOR DEVICE ON EVENT
  28 # Read and create an array of pairs
  29 with open(path + filename1, "r") as pairs:
  30         pairsArr1 = list()
  31         for line in pairs:
  32                 # We will see a pair and we need to split it into xpoint and ypoint
  33                 xpoint, ypoint = line.split(", ")
  34                 pair = [int(xpoint), int(ypoint)]
  35                 pairsArr1.append(pair)
  36
  37 # PLOTTING FOR DEVICE ON EVENT
  38 # Read and create an array of pairs
  39 with open(path + filename2, "r") as pairs:
  40         pairsArr2 = list()
  41         for line in pairs:
  42                 # We will see a pair and we need to split it into xpoint and ypoint
  43                 xpoint, ypoint = line.split(", ")
  44                 pair = [int(xpoint), int(ypoint)]
  45                 pairsArr2.append(pair)
  46
  47 diff12 = [i for i in pairsArr1 if i not in pairsArr2]
  48
  49 X = np.array(diff12);
  50
  51 # Compute DBSCAN
  52 # eps = distances
  53 # min_samples = minimum number of members of a cluster
  54 db = DBSCAN(eps=10, min_samples=trig - 45).fit(X)
  55 core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
  56 core_samples_mask[db.core_sample_indices_] = True
  57 labels = db.labels_
  58
  59 # Number of clusters in labels, ignoring noise if present.
  60 n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
  61
  62 # Black removed and is used for noise instead.
  63 unique_labels = set(labels)
  64
  65 colors = [plt.cm.Spectral(each)
  66               for each in np.linspace(0, 1, len(unique_labels))]
  67 for k, col in zip(unique_labels, colors):
  68         cluster_col = [1, 0, 0, 1]
  69         if k == -1:
  70             # Black used for noise.
  71             col = [0, 0, 0, 1]
  72
  73         class_member_mask = (labels == k)
  74
  75         # print("Unique label: " + str(k) + " with freq: " + str(labels.tolist().count(k)))
  76         xy = X[class_member_mask & core_samples_mask]
  77         plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(cluster_col),
  78                  markeredgecolor='k', markersize=10)
  79
  80         xy = X[class_member_mask & ~core_samples_mask]
  81         plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
  82                  markeredgecolor='k', markersize=6)
  83
  84 count = 0
  85 for pair in diff12:
  86         if labels[count] == -1:
  87                 plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]), fontsize=10)
  88         else:
  89         # Only print the frequency when this is a real cluster
  90                 plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]) +
  91                         " - Freq:" + str(labels.tolist().count(labels[count])), fontsize=10)
  92         count = count + 1
  93
  94 plt.title(device1 + ' - diff - ' + device2)
  95 plt.show()
  96
  97