1 from sklearn.cluster import DBSCAN
2 from sklearn import metrics
3 import matplotlib.cm as cm
5 import matplotlib.pyplot as plt
7 # Create a subplot with 1 row and 2 columns
8 fig, (ax2) = plt.subplots(1, 1)
9 fig.set_size_inches(7, 7)
13 # TODO: Just change the following path and filename
14 # when needed to read from a different file
15 path = "/scratch/July-2018/Pairs/"
17 filename = device + ".txt"
22 # Read and create an array of pairs
23 with open(path + filename, "r") as pairs:
26 # We will see a pair and we need to split it into xpoint and ypoint
27 xpoint, ypoint = line.split(", ")
28 pair = [int(xpoint), int(ypoint)]
31 # Formed array of pairs
33 X = np.array(pairsArr);
37 # min_samples = minimum number of members of a cluster
38 db = DBSCAN(eps=10, min_samples=trig - 5).fit(X)
39 core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
40 core_samples_mask[db.core_sample_indices_] = True
43 # Number of clusters in labels, ignoring noise if present.
44 n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
46 print('Estimated number of clusters: %d' % n_clusters_)
48 import matplotlib.pyplot as plt
50 # Black removed and is used for noise instead.
51 unique_labels = set(labels)
52 print("Labels: " + str(labels))
54 colors = [plt.cm.Spectral(each)
55 for each in np.linspace(0, 1, len(unique_labels))]
56 for k, col in zip(unique_labels, colors):
58 # Black used for noise.
61 class_member_mask = (labels == k)
63 xy = X[class_member_mask & core_samples_mask]
64 plt.plot(xy[:, 0], xy[:, 1], 'o',
65 markeredgecolor='k', markersize=14)
67 xy = X[class_member_mask & ~core_samples_mask]
68 plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
69 markeredgecolor='k', markersize=6)
73 #if labels[count] != -1:
74 # If this is not a noise (i.e.,real data)
75 # plt.text(pair[0], pair[1], "Freq: " + str(labels.tolist().count(labels[count])), fontsize=10)
77 plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]) +
78 "\nFreq: " + str(labels.tolist().count(labels[count])), fontsize=10)
82 plt.title(device + ' - Estimated number of clusters: %d' % n_clusters_)