From f884d2ea3276e126511cb2bdc8bb9ab0fadad93d Mon Sep 17 00:00:00 2001 From: jzhou Date: Tue, 14 Jul 2009 17:01:50 +0000 Subject: [PATCH] KMeans bench for multi-core version --- .../Benchmarks/Scheduling/KMeans/Cluster.java | 440 ++++++++++++++++++ .../Benchmarks/Scheduling/KMeans/Common.java | 129 +++++ .../Benchmarks/Scheduling/KMeans/KMeans.java | 150 ++++++ .../Scheduling/KMeans/KMeansBench.java | 278 +++++++++++ .../Scheduling/KMeans/LICENSE.kmeans | 36 ++ .../Scheduling/KMeans/MyRandom.java | 102 ++++ .../src/Benchmarks/Scheduling/KMeans/README | 89 ++++ 7 files changed, 1224 insertions(+) create mode 100644 Robust/src/Benchmarks/Scheduling/KMeans/Cluster.java create mode 100644 Robust/src/Benchmarks/Scheduling/KMeans/Common.java create mode 100644 Robust/src/Benchmarks/Scheduling/KMeans/KMeans.java create mode 100644 Robust/src/Benchmarks/Scheduling/KMeans/KMeansBench.java create mode 100644 Robust/src/Benchmarks/Scheduling/KMeans/LICENSE.kmeans create mode 100644 Robust/src/Benchmarks/Scheduling/KMeans/MyRandom.java create mode 100644 Robust/src/Benchmarks/Scheduling/KMeans/README diff --git a/Robust/src/Benchmarks/Scheduling/KMeans/Cluster.java b/Robust/src/Benchmarks/Scheduling/KMeans/Cluster.java new file mode 100644 index 00000000..aa97d35f --- /dev/null +++ b/Robust/src/Benchmarks/Scheduling/KMeans/Cluster.java @@ -0,0 +1,440 @@ +/* ============================================================================= + * + * cluster.java + * + * ============================================================================= + * + * Description: + * + * Takes as input a file, containing 1 data point per per line, and performs a + * fuzzy c-means clustering on the data. Fuzzy clustering is performed using + * min to max clusters and the clustering that gets the best score according to + * a compactness and separation criterion are returned. + * + * + * Author: + * + * Brendan McCane + * James Cook University of North Queensland. Australia. + * email: mccane@cs.jcu.edu.au + * + * + * Edited by: + * + * Jay Pisharath, Wei-keng Liao + * Northwestern University + * + * Chi Cao Minh + * Stanford University + * + * Ported to Java: + * Alokika Dash + * University of California, Irvine + * + * ============================================================================= + * + * For the license of kmeans, please see kmeans/LICENSE.kmeans + * + * ------------------------------------------------------------------------ + * + * Unless otherwise noted, the following license applies to STAMP files: + * + * Copyright (c) 2007, Stanford University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Stanford University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. +* +* ============================================================================= +*/ +public class Cluster { + // Flags + flag startLoop; + flag setKMeans; + flag toendLoop; + flag resetKMeans; + flag finish; + + flag setStartZ; + flag startZ; + + /** + * Numbers of loops to perform + */ + int nloops; + + /* + * Flag to indicate if nloops has been changes + */ + boolean changeNLoops; + + /** + * User input for max clusters + **/ + int max_nclusters; + + /** + * User input for min clusters + **/ + int min_nclusters; + + /** + * Using zscore transformation for cluster center + * deviating from distribution's mean + **/ + int use_zscore_transform; + + /** + * Total number of threads + **/ + int nthreads; + + /** + * threshold until which kmeans cluster continues + **/ + float threshold; + + /** + * List of attributes + **/ + public float[][] feature; + + /** + * Number of attributes per Object + **/ + public int nfeatures; + + /** + * Number of Objects + **/ + public int npoints; + + /** + * Since zscore transform may perform in cluster() which modifies the + * contents of attributes[][], we need to re-store the originals each time + * starting a loop. + */ + public float[][] attributes; + + /** + * + */ + MyRandom randomPtr; + + /* + * Number of clusters to compute + */ + int nclusters; + + /* + * Flag indicating if nclusters has been changed + */ + boolean changeNClusters; + + /* + * Counter for aggregation of KMeans + */ + int counterKMS; + + /* + * + */ + intwrapper[] new_centers_len; + float[][] new_centers; + + /* + * + */ + float delta; + + /* + * Counter for finished inner loops + */ + int counterILoop; + + /* + * Flag for initialization before setting KMeans + */ + boolean isSet4KMeans; + + /** + * Output: Number of best clusters + **/ + int best_nclusters; + + /** + * Output: Cluster centers + **/ + float[][] cluster_centres; + + public Cluster(int nloops, + int max_nclusters, + int min_nclusters, + float threshold, + int use_zscore_transform, + int nthreads, + int numAttributes, + int numObjects, + float[][] buf) { + this.nloops = nloops; + this.max_nclusters = max_nclusters; + this.min_nclusters = min_nclusters; + this.threshold = threshold; + this.use_zscore_transform = use_zscore_transform; + this.nthreads = nthreads; + this.nfeatures = numAttributes; + this.npoints = numObjects; + this.feature = buf; + this.attributes = new float[this.npoints][this.nfeatures]; + this.randomPtr = new MyRandom(); + randomPtr.random_alloc(); + this.nclusters = 0; + this.changeNClusters = false; + this.counterILoop = 0; + this.counterKMS = 0; + this.new_centers_len = null; + this.new_centers = null; + this.best_nclusters = 0; + this.cluster_centres = null; + this.isSet4KMeans = false; + } + + /* + * + */ + public boolean initialize() { + this.nclusters = this.min_nclusters; + this.changeNClusters = false; + this.changeNLoops = false; + this.counterILoop = 0; + this.counterKMS = 0; + this.new_centers_len = null; + this.new_centers = null; + + float[][] attributes = this.attributes; + float[][] feature = this.feature; + int npoints = this.npoints; + int nfeatures = this.nfeatures; + /* + * Since zscore transform may perform in cluster() which modifies the + * contents of attributes[][], we need to re-store the originals + */ + for(int x = 0; x < npoints; x++) { + for(int y = 0; y < nfeatures; y++) { + attributes[x][y] = feature[x][y]; + } + } + + if(this.use_zscore_transform == 1) { + zscoreTransform(attributes, npoints, nfeatures); + } + } + + /* ============================================================================= + * extractMoments + * ============================================================================= + */ + public static float[] extractMoments (float []data, + int num_elts, + int num_moments) { + float[] moments = new float[num_moments]; + + float mzero=0.0f; + for (int i = 0; i < num_elts; i++) { + mzero += data[i]; + } + + moments[0] = mzero / num_elts; + for (int j = 1; j < num_moments; j++) { + moments[j] = 0; + for (int i = 0; i < num_elts; i++) { + moments[j] += (float) Math.pow((data[i]-moments[0]), j+1); + } + moments[j] = moments[j] / num_elts; + } + return moments; + } + + /* ============================================================================= + * zscoreTransform + * ============================================================================= + */ + public static void zscoreTransform (float[][] data, /* in & out: [numObjects][numAttributes] */ + int numObjects, + int numAttributes) { + float[] moments; + float[] single_variable = new float[numObjects]; + for (int i = 0; i < numAttributes; i++) { + for (int j = 0; j < numObjects; j++) { + single_variable[j] = data[j][i]; + } + moments = extractMoments(single_variable, numObjects, 2); + moments[1] = (float) Math.sqrt((double)moments[1]); + for (int j = 0; j < numObjects; j++) { + data[j][i] = (data[j][i]-moments[0])/moments[1]; + } + } + } + + public void resetCounterKMS() { + this.counterKMS = 0; + } + + public void incCounterKMS() { + this.counterKMS++; + } + + public boolean isCounterKMSFull() { + return (this.counterKMS == this.nthreads); + } + + public void resetCounterILoop() { + this.counterILoop = 0; + } + + public boolean isILoopFinish() { + return !((this.delta > this.threshold) && (this.counterILoop < 500 + 1)); + } + + public void setBestClusters() { + this.best_nclusters = this.nclusters; + } + + public void decNLoops() { + if(!this.changeNLoops) { + this.nloops--; + this.changeNLoops = true; + } + } + + public boolean checkNLoops() { + return (this.nloops > 0); + } + + public void resetChangeNLoops() { + this.changeNLoops = false; + } + + public void incNClusters() { + if(!this.changeNClusters) { + this.nclusters++; + this.changeNClusters = true; + } + } + + public boolean checkNClusters() { + return (this.nclusters <= this.max_nclusters); + } + + public void resetChangeNClusters() { + this.changeNClusters = false; + } + + public void resetIsSet4KMeans() { + this.isSet4KMeans = false; + } + + public void setKMeans(KMeans kms) { + if(!this.isSet4KMeans) { + this.randomPtr.random_seed(7); + this.new_centers_len = new intwrapper[this.nclusters]; + this.new_centers = new float[this.nclusters][this.nfeatures]; + this.cluster_centres = new float[this.nclusters][this.nfeatures]; + float[][] cluster_centres = this.cluster_centres; + float[][] attributes = this.attributes; + int npoints = this.npoints; + int nclusters = this.nclusters; + int nfeatures = this.nfeatures; + MyRandom randomPtr = this.randomPtr; + /* Randomly pick cluster centers */ + for (int i = 0; i < nclusters; i++) { + int n = (int)(randomPtr.random_generate() % npoints); + for (int j = 0; j < nfeatures; j++) { + cluster_centres[i][j] = attributes[n][j]; + } + } + this.delta = (float) 0.0; + this.isSet4KMeans = true; + } + + this.innerKMeansSetting(kms); + } + + public void innerKMeansSetting(KMeans kms) { + kms.setFeature(this.attributes); + kms.setClusters(this.nclusters, this.cluster_centres); + } + + public void resetDelta() { + this.delta = (float) 0.0; + } + + public void mergeKMeans(KMeans kms) { + int nclusters = this.nclusters; + int nfeatures = this.nfeatures; + float[][] new_centers = this.new_centers; + float[][] kmsnew_centers = kms.new_centers; + intwrapper[] new_centers_len = this.new_centers_len; + intwrapper[] kmsnew_centers_len = kms.new_centers_len; + + this.delta += kms.delta; + for (int i = 0; i < nclusters; i++) { + for (int j = 0; j < nfeatures; j++) { + new_centers[i][j] += kmsnew_centers[i][j]; + kmsnew_centers[i][j] = (float)0.0; /* set back to 0 */ + } + new_centers_len[i] += kmsnew_centers_len[i]; + kmsnew_centers_len[i] = 0; /* set back to 0 */ + } + + if(this.isCounterKMSFull()) { + // finish one inner loop + this.counterILoop++; + + float[][] cluster_centres = this.cluster_centres; + /* Replace old cluster centers with new_centers */ + for (int i = 0; i < nclusters; i++) { + for (int j = 0; j < nfeatures; j++) { + if (new_centers_len[i] > 0) { + cluster_centres[i][j] = new_centers[i][j] / new_centers_len[i]; + } + new_centers[i][j] = (float)0.0; /* set back to 0 */ + } + new_centers_len[i] = 0; /* set back to 0 */ + } + + this.delta /= this.npoints; + } + } +} +/* ============================================================================= + * + * End of cluster.java + * + * ============================================================================= + */ diff --git a/Robust/src/Benchmarks/Scheduling/KMeans/Common.java b/Robust/src/Benchmarks/Scheduling/KMeans/Common.java new file mode 100644 index 00000000..181082ed --- /dev/null +++ b/Robust/src/Benchmarks/Scheduling/KMeans/Common.java @@ -0,0 +1,129 @@ +/* ============================================================================= + * + * common.java + * + * ============================================================================= + * + * For the license of bayes/sort.h and bayes/sort.c, please see the header + * of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of kmeans, please see kmeans/LICENSE.kmeans + * + * ------------------------------------------------------------------------ + * + * For the license of ssca2, please see ssca2/COPYRIGHT + * + * ------------------------------------------------------------------------ + * + * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the + * header of the files. + * + * ------------------------------------------------------------------------ + * + * For the license of lib/rbtree.h and lib/rbtree.c, please see + * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree + * + * ------------------------------------------------------------------------ + * + * Unless otherwise noted, the following license applies to STAMP files: + * + * Copyright (c) 2007, Stanford University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Stanford University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ============================================================================= + */ + +public class Common { + + public Common() { + } + + + /* ============================================================================= + * common_euclidDist2 + * -- multi-dimensional spatial Euclid distance square + * ============================================================================= + */ + public static float + common_euclidDist2 (float[] pt1, float[] pt2, int numdims) + { + int i; + float ans = 0.0f; + + for (i = 0; i < numdims; i++) { + ans += (pt1[i] - pt2[i]) * (pt1[i] - pt2[i]); + } + + return ans; + } + + + /* ============================================================================= + * common_findNearestPoint + * ============================================================================= + */ + public static int + common_findNearestPoint (float[] pt, /* [nfeatures] */ + int nfeatures, + float[][] pts, /* [npts][nfeatures] */ + int npts) + { + int index = 0; + int i; + //double max_dist = FLT_MAX; + float max_dist = (float)3.40282347e+38; + float limit = (float) 0.99999; + + /* Find the cluster center id with min distance to pt */ + for (i = 0; i < npts; i++) { + float dist = common_euclidDist2(pt, pts[i], nfeatures); /* no need square root */ + if ((dist / max_dist) < limit) { + max_dist = dist; + index = i; + if (max_dist == 0) { + break; + } + } + } + + return index; + } +} + + +/* ============================================================================= + * + * End of common.java + * + * ============================================================================= + */ diff --git a/Robust/src/Benchmarks/Scheduling/KMeans/KMeans.java b/Robust/src/Benchmarks/Scheduling/KMeans/KMeans.java new file mode 100644 index 00000000..f75b9ec1 --- /dev/null +++ b/Robust/src/Benchmarks/Scheduling/KMeans/KMeans.java @@ -0,0 +1,150 @@ +public class KMeans { + // Flags + flag init; + flag run; + flag turnin; + flag reset; + + /** + * thread id + **/ + public int threadid; + + /** + * Total number of threads + **/ + int nthreads; + + // Read-only memebers + /** + * List of attributes + **/ + public float[][] feature; + + /** + * Number of attributes per Object + **/ + public int nfeatures; + + /** + * Number of Objects + **/ + public int npoints; + + /** + * + **/ + public float[][] clusters; + + /** + * Iteration id between min_nclusters to max_nclusters + **/ + public int nclusters; + + // Writable members + /** + * + */ + public float delta; + + /** + * Array that holds change index of cluster center per thread + **/ + public int[] membership; + + // Local copy members, writable + /** + * Number of points in each cluster [nclusters] + **/ + public intwrapper[] new_centers_len; + + /** + * New centers of the clusters [nclusters][nfeatures] + **/ + public float[][] new_centers; + + public KMeans(int threadid, + int nthreads, + int nfeatures, + int npoints, + int nclusters, + float[][] feature, + float[][] clusters) { + this.threadid = threadid; + this.nthreads = nthreads; + this.nfeatures = nfeatures; + this.npoints = npoints; + this.nclusters = nclusters; + this.feature = feature; + this.clusters = clusters; + this.delta = (float)0.0; + this.membership = new int[this.npoints / this.nthreads]; + this.new_centers_len = null; + this.new_centers = null; + } + + public void setFeature(float[][] feature){ + this.feature = feature; + } + + public void setClusters(int nclusters, + float[][] clusters){ + this.nclusters = nclusters; + this.clusters = clusters; + } + + public void init() { + int[] membership = this.membership; + int length = this.membership.length; + for (int i = 0; i < length; i++) { + membership[i] = -1; + } + this.new_centers_len = new intwrapper[this.nclusters]; + this.new_centers = new float[this.nclusters][this.nfeatures]; + } + + public void run() { + float delta = (float) 0.0; + + float[][] feature = this.feature; + int nfeatures = this.nfeatures; + int npoints = this.npoints; + int nclusters = this.nclusters; + int[] membership = this.membership; + float[][] clusters = this.clusters; + intwrapper[] new_centers_len = this.new_centers_len; + float[][] new_centers = this.new_centers; + int index, start, span; + + start = this.threadid; + span = this.nthreads; + + int k = 0; + //System.out.println("myId= " + myId + " start= " + start + " npoints= " + npoints); + for (int i = start; i < npoints; i += span) { + index = Common.common_findNearestPoint(feature[i], + nfeatures, + clusters, + nclusters); + /* + * If membership changes, increase delta by 1. + * membership[i] cannot be changed by other threads + */ + if (membership[k] != index) { + delta += 1.0f; + } + + /* Assign the membership to object i */ + /* membership[i] can't be changed by other thread */ + membership[k] = index; + + /* Update new cluster centers : sum of objects located within */ + new_centers_len[index] = new_centers_len[index] + 1; + for (int j = 0; j < nfeatures; j++) { + new_centers[index][j] = new_centers[index][j] + feature[i][j]; + } + k++; + } + this.delta = delta; + } +} \ No newline at end of file diff --git a/Robust/src/Benchmarks/Scheduling/KMeans/KMeansBench.java b/Robust/src/Benchmarks/Scheduling/KMeans/KMeansBench.java new file mode 100644 index 00000000..33fae7b2 --- /dev/null +++ b/Robust/src/Benchmarks/Scheduling/KMeans/KMeansBench.java @@ -0,0 +1,278 @@ +/** Bamboo version + * + * @author jzhou + * + */ +/* ============================================================================= + * + * kmeans.java + * + * ============================================================================= + * + * Description: + * + * Takes as input a file: + * ascii file: containing 1 data point per line + * binary file: first int is the number of objects + * 2nd int is the no. of features of each object + * + * This example performs a fuzzy c-means clustering on the data. Fuzzy clustering + * is performed using min to max clusters and the clustering that gets the best + * score according to a compactness and separation criterion are returned. + * + * + * Author: + * + * Wei-keng Liao + * ECE Department Northwestern University + * email: wkliao@ece.northwestern.edu + * + * + * Edited by: + * + * Jay Pisharath + * Northwestern University + * + * Chi Cao Minh + * Stanford University + * + * Port to Java version + * Alokika Dash + * University of California, Irvine + * + * ============================================================================= + * + * ------------------------------------------------------------------------ + * + * For the license of kmeans, please see kmeans/LICENSE.kmeans + * + * ------------------------------------------------------------------------ + * + * Unless otherwise noted, the following license applies to STAMP files: + * + * Copyright (c) 2007, Stanford University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of Stanford University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * ============================================================================= + */ + +task t1(StartupObject s{initialstate}) { + //System.printString("task t1\n"); + + // Benchmark settings + int max_nclusters = 40; + int min_nclusters = 40; + float threshold = (float)0.00001; + int use_zscore_transform = 1; + int nthreads = 62; + int nloops = 1; + + // Initialize object data + int numRow = nthreads * 120; + int numDim = 16 * 2; //5; + int numAttributes = numDim; + int numObjects = numRow; // should be times of nthreads + float[][] buf = new float[numObjects][numAttributes]; + // create object info here + long seed = 0; + Random rand = new Random(seed); + if(false) { + // Clustered random using gaussian + /*int numCenter = 16; + int maxInt = (~0x1) + 1; + float[][] centers = new float[numCenter][numDim]; + for(int i = 0; i < numCenter; i++) { + for(int j = 0; j < numDim; j++) { + centers[i][j] = (float)rand.nextInt() / maxInt; + } + } + //float sigma = ((float)1.0 / numCenter) * ((float)1.0 / numCenter) * ((float)1.0 / numCenter); + for(int i = 0; i < numRow; i++) { + //buf[i][0] = numRow; + int ranI = Math.abs(rand.nextInt()) % numCenter; + float[] center = centers[ranI]; + for(int j = 0; j < numDim; j++) { + float noise = (float)rand.nextGaussian(); + buf[i][j] = center[j] + noise; + } + }*/ + } else { + // uniform random + int maxInt = (~0x1) + 1; + for(int i = 0; i < numRow; i++) { + //buf[i][0] = numRow; + for(int j = 0; j < numDim; j++) { + buf[i][j] = (float)rand.nextInt() / maxInt; + //buf[i][j] = (float)(i * 20000 + j * 50000) / maxInt; + } + } + } + + Cluster cluster = new Cluster(nloops, + max_nclusters, + min_nclusters, + threshold, + use_zscore_transform, + nthreads, + numAttributes, + numObjects, + buf) {startLoop}; + + /* Create parallel parts */ + for(int i = 0; i> 30)) + mti); + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + mt[mti] &= 0xFFFFFFFF; + /* for >32 bit machines */ + } + this.mti=mti; + } + + public void random_seed(int seed) { + init_genrand(seed); + } + + public int random_generate() { + return genrand_int32(); + } + + public int posrandom_generate() { + int r=genrand_int32(); + if (r>0) + return r; + else + return -r; + } + + public int genrand_int32() { + int y; + int mti = this.mti; + + /* mag01[x] = x * MATRIX_A for x=0,1 */ + + if (mti >= 624) { /* generate N words at one time */ + int kk; + int[] mt = this.mt; + + if (mti == 624+1) /* if init_genrand() has not been called, */ + init_genrand(5489); /* a default initial seed is used */ + + for (kk=0;kk<(624-397);kk++) { + y = (mt[kk]&0x80000000)|(mt[kk+1]&0x7fffffff); + mt[kk] = mt[kk+397] ^ (y >> 1) ^ ((y & 0x1)==0 ? 0:0x9908b0df); + } + for (;kk<(624-1);kk++) { + y = (mt[kk]&0x80000000)|(mt[kk+1]&0x7fffffff); + mt[kk] = mt[kk+(397-624)] ^ (y >> 1) ^ ((y & 0x1)==0 ? 0:0x9908b0df); + } + y = (mt[624-1]&0x80000000)|(mt[0]&0x7fffffff); + mt[624-1] = mt[397-1] ^ (y >> 1) ^ ((y & 0x1)==0 ? 0:0x9908b0df); + + mti = 0; + } + + y = mt[mti++]; + + /* Tempering */ + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680; + y ^= (y << 15) & 0xefc60000; + y ^= (y >> 18); + + this.mti = mti; + + return y; + } +} diff --git a/Robust/src/Benchmarks/Scheduling/KMeans/README b/Robust/src/Benchmarks/Scheduling/KMeans/README new file mode 100644 index 00000000..b25fc24d --- /dev/null +++ b/Robust/src/Benchmarks/Scheduling/KMeans/README @@ -0,0 +1,89 @@ +Introduction +------------ + +This benchmark was taken from NU-MineBench 2.0. Their technical report [3] +provides a description of the algorithm: + + K-means is a partition-based method [4] and is arguably the most + commonly used clustering technique. K-means represents a cluster by the + mean value of all objects contained in it. Given the user-provided + parameter k, the initial k cluster centers are randomly selected from + the database. Then, K-means assigns each object to its nearest cluster + center based on the similarity function. For example, for spatial + clustering, usually the Euclid distance is used to measure the closeness + of two objects. Once the assignments are completed, new centers are + found by finding the mean of all the objects in each cluster. This + process is repeated until two consecutive iterations generate the same + cluster assignment. The clusters produced by the K-means algorithm are + sometimes called "hard" clusters, since any data object either is or is + not a member of a particular cluster. + + +Compiling and Running +--------------------- + +To build the application, simply run: + + make + +By default, this produces an executable named "KMeans.bin", which can then be +run in the following manner: + + ./KMeans.bin -m \ + -n \ + -t \ + -i \ + -nthreads + +To produce the data in [1], the following values were used: + + low contention: -m 40 -n 40 -t 0.05 -i inputs/random-n2048-d16-c16.txt + high contention: -m 15 -n 15 -t 0.05 -i inputs/random-n2048-d16-c16.txt + +For runs without a simulator, a larger input file, (more info below) can be used +instead: + + low contention: -m 40 -n 40 -t 0.00001 -i inputs/random-n65536-d32-c16.txt + high contention: -m 15 -n 15 -t 0.00001 -i inputs/random-n65536-d32-c16.txt + + +Input Files +----------- + +A section of the real image database distributed by Corel Corporation is used +for K-means. This database consists of 17695 scenery pictures. Each picture is +represented by two features: color and edge. The color feature is a vector of 9 +floating points while the edge feature is a vector of size 18. Both K-means +implementations use Euclid distance as the similarity function and execute it +for the two features separately. Since the clustering quality of K-means +methods highly depends on the input parameter k, both K-means were executed +with ten different k values ranging from 4 to 13. + +Smaller versions of "color", "edge", and "texture" were made by taking the +first 100 lines of each file. The smaller inputs are called "color100", +"edge100", and "texture100". + +The randomX_Y files were created by generating random data sets with X points +of Y dimensions each. Values were selected from a uniform distribution. + +More input sets can be generated by using "inputs/generate.py". The script picks +a given number of uniformly distributed random centers and then selects random +points around those centers using a gaussian distribution. There are two +included input files from this script: small, random-r2048-d16-c16.txt; and +large, random-r65536-d32-c16.txt. In the filename, "n" refers to the number of +points, "d" the number of dimensions, and "c" the number of centers. + + +References +---------- + +[1] C. Cao Minh, J. Chung, C. Kozyrakis, and K. Olukotun. STAMP: Stanford + Transactional Applications for Multi-processing. In IISWC '08: Proceedings + of The IEEE International Symposium on Workload Characterization, + September 2008. + +[2] J. Pisharath. NU-MineBench 2.0. Technical Report CUCIS-2005-08-01, 2005. + +[3] J.C. Bezdek. Pattern Recognition with Fuzzy Objective Function Algorithms. + Kluwer Academic Publishers, 1981 + -- 2.34.1