python_ml/dlink_clustering.py

   1 from sklearn.cluster import KMeans
   2 import matplotlib.cm as cm
   3 import numpy as np
   4 import matplotlib.pyplot as plt
   5
   6 # Create a subplot with 1 row and 2 columns
   7 fig, (ax2) = plt.subplots(1, 1)
   8 fig.set_size_inches(7, 7)
   9
  10 X = np.array([[132, 192], [117, 960], [117, 962], [1343, 0], [117, 1109], [117, 1110], [117, 1111], [117, 1116], [117, 1117], [117, 1118], [117, 1119], [1015, 0], [117, 966]])
  11 #kmeans = KMeans(n_clusters=5, random_state=0).fit(X)
  12 #print(kmeans.labels_)
  13 #print(kmeans.labels_.tolist().count(3))
  14 clusters = 5
  15
  16 # Plot the data points based on the clusters
  17 clusterer = KMeans(n_clusters=clusters, random_state=10)
  18 cluster_labels = clusterer.fit_predict(X)
  19 # 2nd Plot showing the actual clusters formed
  20 colors = cm.nipy_spectral(cluster_labels.astype(float) / clusters)
  21 ax2.scatter(X[:, 0], X[:, 1], marker='o', s=100, lw=0, alpha=0.3,
  22             c=colors, edgecolor='k')
  23
  24 # Labeling the clusters
  25 centers = clusterer.cluster_centers_
  26 # Label with cluster centers and frequencies
  27 for i, c in enumerate(centers):
  28         mark = '[' + str(int(c[0])) + ', ' + str(int(c[1])) + ']' + ', ' + str(clusterer.labels_.tolist().count(i))
  29         ax2.scatter(c[0], c[1], marker='$%s$' % mark, alpha=1, s=3000, edgecolor='k')
  30
  31 # Draw white circles at cluster centers
  32 #ax2.scatter(centers[:, 0], centers[:, 1], marker='o',
  33 #            c="white", alpha=1, s=200, edgecolor='k')
  34
  35 #for i, c in enumerate(centers):
  36 #    ax2.scatter(c[0], c[1], marker='$%d$' % i, alpha=1,
  37 #                s=50, edgecolor='k')
  38 #for i, c in enumerate(centers):
  39 #       print(c[0], c[1])
  40
  41 ax2.set_title("The visualization of the clustered data.")
  42 ax2.set_xlabel("Feature space for the 1st feature")
  43 ax2.set_ylabel("Feature space for the 2nd feature")
  44 plt.show()
  45