1 /* =============================================================================
5 * =============================================================================
9 * Takes as input a file, containing 1 data point per per line, and performs a
10 * fuzzy c-means clustering on the data. Fuzzy clustering is performed using
11 * min to max clusters and the clustering that gets the best score according to
12 * a compactness and separation criterion are returned.
18 * James Cook University of North Queensland. Australia.
19 * email: mccane@cs.jcu.edu.au
24 * Jay Pisharath, Wei-keng Liao
25 * Northwestern University
32 * University of California, Irvine
34 * =============================================================================
36 * For the license of kmeans, please see kmeans/LICENSE.kmeans
38 * ------------------------------------------------------------------------
40 * Unless otherwise noted, the following license applies to STAMP files:
42 * Copyright (c) 2007, Stanford University
43 * All rights reserved.
45 * Redistribution and use in source and binary forms, with or without
46 * modification, are permitted provided that the following conditions are
49 * * Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
52 * * Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in
54 * the documentation and/or other materials provided with the
57 * * Neither the name of Stanford University nor the names of its
58 * contributors may be used to endorse or promote products derived
59 * from this software without specific prior written permission.
61 * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
62 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
64 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
65 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
66 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
67 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
68 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
69 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
70 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
71 * THE POSSIBILITY OF SUCH DAMAGE.
73 * =============================================================================
75 public class Cluster {
87 * Numbers of loops to perform
92 * Flag to indicate if nloops has been changes
97 * User input for max clusters
102 * User input for min clusters
107 * Using zscore transformation for cluster center
108 * deviating from distribution's mean
110 int use_zscore_transform;
113 * Total number of threads
118 * threshold until which kmeans cluster continues
125 public float[][] feature;
128 * Number of attributes per Object
130 public int nfeatures;
138 * Since zscore transform may perform in cluster() which modifies the
139 * contents of attributes[][], we need to re-store the originals each time
142 public float[][] attributes;
150 * Number of clusters to compute
155 * Flag indicating if nclusters has been changed
157 boolean changeNClusters;
160 * Counter for aggregation of KMeans
167 intwrapper[] new_centers_len;
168 float[][] new_centers;
176 * Counter for finished inner loops
181 * Flag for initialization before setting KMeans
183 boolean isSet4KMeans;
186 * Output: Number of best clusters
191 * Output: Cluster centers
193 float[][] cluster_centres;
195 public Cluster(int nloops,
199 int use_zscore_transform,
204 this.nloops = nloops;
205 this.max_nclusters = max_nclusters;
206 this.min_nclusters = min_nclusters;
207 this.threshold = threshold;
208 this.use_zscore_transform = use_zscore_transform;
209 this.nthreads = nthreads;
210 this.nfeatures = numAttributes;
211 this.npoints = numObjects;
213 this.attributes = new float[this.npoints][this.nfeatures];
214 this.randomPtr = new MyRandom();
215 randomPtr.random_alloc();
217 this.changeNClusters = false;
218 this.counterILoop = 0;
220 this.new_centers_len = null;
221 this.new_centers = null;
222 this.best_nclusters = 0;
223 this.cluster_centres = null;
224 this.isSet4KMeans = false;
230 public boolean initialize() {
231 this.nclusters = this.min_nclusters;
232 this.changeNClusters = false;
233 this.changeNLoops = false;
234 this.counterILoop = 0;
236 this.new_centers_len = null;
237 this.new_centers = null;
239 float[][] attributes = this.attributes;
240 float[][] feature = this.feature;
241 int npoints = this.npoints;
242 int nfeatures = this.nfeatures;
244 * Since zscore transform may perform in cluster() which modifies the
245 * contents of attributes[][], we need to re-store the originals
247 for(int x = 0; x < npoints; x++) {
248 for(int y = 0; y < nfeatures; y++) {
249 attributes[x][y] = feature[x][y];
253 if(this.use_zscore_transform == 1) {
254 zscoreTransform(attributes, npoints, nfeatures);
258 /* =============================================================================
260 * =============================================================================
262 public static float[] extractMoments (float []data,
265 float[] moments = new float[num_moments];
268 for (int i = 0; i < num_elts; i++) {
272 moments[0] = mzero / num_elts;
273 for (int j = 1; j < num_moments; j++) {
275 for (int i = 0; i < num_elts; i++) {
276 moments[j] += (float) Math.pow((data[i]-moments[0]), j+1);
278 moments[j] = moments[j] / num_elts;
283 /* =============================================================================
285 * =============================================================================
287 public static void zscoreTransform (float[][] data, /* in & out: [numObjects][numAttributes] */
291 float[] single_variable = new float[numObjects];
292 for (int i = 0; i < numAttributes; i++) {
293 for (int j = 0; j < numObjects; j++) {
294 single_variable[j] = data[j][i];
296 moments = extractMoments(single_variable, numObjects, 2);
297 moments[1] = (float) Math.sqrt((double)moments[1]);
298 for (int j = 0; j < numObjects; j++) {
299 data[j][i] = (data[j][i]-moments[0])/moments[1];
304 public void resetCounterKMS() {
308 public void incCounterKMS() {
312 public boolean isCounterKMSFull() {
313 return (this.counterKMS == this.nthreads);
316 public void resetCounterILoop() {
317 this.counterILoop = 0;
320 public boolean isILoopFinish() {
321 return !((this.delta > this.threshold) && (this.counterILoop < 500 + 1));
324 public void setBestClusters() {
325 this.best_nclusters = this.nclusters;
328 public void decNLoops() {
329 if(!this.changeNLoops) {
331 this.changeNLoops = true;
335 public boolean checkNLoops() {
336 return (this.nloops > 0);
339 public void resetChangeNLoops() {
340 this.changeNLoops = false;
343 public void incNClusters() {
344 if(!this.changeNClusters) {
346 this.changeNClusters = true;
350 public boolean checkNClusters() {
351 return (this.nclusters <= this.max_nclusters);
354 public void resetChangeNClusters() {
355 this.changeNClusters = false;
358 public void resetIsSet4KMeans() {
359 this.isSet4KMeans = false;
362 public void setKMeans(KMeans kms) {
363 if(!this.isSet4KMeans) {
364 this.randomPtr.random_seed(7);
365 this.new_centers_len = new intwrapper[this.nclusters];
366 this.new_centers = new float[this.nclusters][this.nfeatures];
367 this.cluster_centres = new float[this.nclusters][this.nfeatures];
368 float[][] cluster_centres = this.cluster_centres;
369 float[][] attributes = this.attributes;
370 int npoints = this.npoints;
371 int nclusters = this.nclusters;
372 int nfeatures = this.nfeatures;
373 MyRandom randomPtr = this.randomPtr;
374 /* Randomly pick cluster centers */
375 for (int i = 0; i < nclusters; i++) {
376 int n = (int)(randomPtr.random_generate() % npoints);
377 for (int j = 0; j < nfeatures; j++) {
378 cluster_centres[i][j] = attributes[n][j];
381 this.delta = (float) 0.0;
382 this.isSet4KMeans = true;
385 this.innerKMeansSetting(kms);
388 public void innerKMeansSetting(KMeans kms) {
389 kms.setFeature(this.attributes);
390 kms.setClusters(this.nclusters, this.cluster_centres);
393 public void resetDelta() {
394 this.delta = (float) 0.0;
397 public void mergeKMeans(KMeans kms) {
398 int nclusters = this.nclusters;
399 int nfeatures = this.nfeatures;
400 float[][] new_centers = this.new_centers;
401 float[][] kmsnew_centers = kms.new_centers;
402 intwrapper[] new_centers_len = this.new_centers_len;
403 intwrapper[] kmsnew_centers_len = kms.new_centers_len;
405 this.delta += kms.delta;
406 for (int i = 0; i < nclusters; i++) {
407 for (int j = 0; j < nfeatures; j++) {
408 new_centers[i][j] += kmsnew_centers[i][j];
409 kmsnew_centers[i][j] = (float)0.0; /* set back to 0 */
411 new_centers_len[i] += kmsnew_centers_len[i];
412 kmsnew_centers_len[i] = 0; /* set back to 0 */
415 if(this.isCounterKMSFull()) {
416 // finish one inner loop
419 float[][] cluster_centres = this.cluster_centres;
420 /* Replace old cluster centers with new_centers */
421 for (int i = 0; i < nclusters; i++) {
422 for (int j = 0; j < nfeatures; j++) {
423 if (new_centers_len[i] > 0) {
424 cluster_centres[i][j] = new_centers[i][j] / new_centers_len[i];
426 new_centers[i][j] = (float)0.0; /* set back to 0 */
428 new_centers_len[i] = 0; /* set back to 0 */
431 this.delta /= this.npoints;
435 /* =============================================================================
437 * End of cluster.java
439 * =============================================================================