introduces stall site in front of loop body. later it should be done by the optimizat...
[IRC.git] / Robust / src / Benchmarks / oooJava / kmeans / Normal.java
1 import Barrier;
2 import Common;
3 import GlobalArgs;
4 import Normal;
5 import Random;
6 import System;
7 import intwrapper;
8
9 /* =============================================================================
10  *
11  * normal.java
12  * -- Implementation of normal k-means clustering algorithm
13  *
14  * =============================================================================
15  *
16  * Author:
17  *
18  * Wei-keng Liao
19  * ECE Department, Northwestern University
20  * email: wkliao@ece.northwestern.edu
21  *
22  *
23  * Edited by:
24  *
25  * Jay Pisharath
26  * Northwestern University.
27  *
28  * Chi Cao Minh
29  * Stanford University
30  *
31  * Alokika Dash
32  * University of California, Irvine
33  * Ported to Java
34  *
35  * =============================================================================
36  *
37  * For the license of bayes/sort.h and bayes/sort.c, please see the header
38  * of the files.
39  * 
40  * ------------------------------------------------------------------------
41  * 
42  * For the license of kmeans, please see kmeans/LICENSE.kmeans
43  * 
44  * ------------------------------------------------------------------------
45  * 
46  * For the license of ssca2, please see ssca2/COPYRIGHT
47  * 
48  * ------------------------------------------------------------------------
49  * 
50  * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the
51  * header of the files.
52  * 
53  * ------------------------------------------------------------------------
54  * 
55  * For the license of lib/rbtree.h and lib/rbtree.c, please see
56  * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree
57  * 
58  * ------------------------------------------------------------------------
59  * 
60  * Unless otherwise noted, the following license applies to STAMP files:
61  * 
62  * Copyright (c) 2007, Stanford University
63  * All rights reserved.
64  * 
65  * Redistribution and use in source and binary forms, with or without
66  * modification, are permitted provided that the following conditions are
67  * met:
68  * 
69  *     * Redistributions of source code must retain the above copyright
70  *       notice, this list of conditions and the following disclaimer.
71  * 
72  *     * Redistributions in binary form must reproduce the above copyright
73  *       notice, this list of conditions and the following disclaimer in
74  *       the documentation and/or other materials provided with the
75  *       distribution.
76  * 
77  *     * Neither the name of Stanford University nor the names of its
78  *       contributors may be used to endorse or promote products derived
79  *       from this software without specific prior written permission.
80  * 
81  * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
82  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
83  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
84  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
85  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
86  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
87  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
88  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
89  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
90  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
91  * THE POSSIBILITY OF SUCH DAMAGE.
92  *
93  * =============================================================================
94  */
95
96 public class Normal {
97         int CHUNK;
98
99         public Normal() {
100                 CHUNK = 3;
101         }
102
103         /*
104          * ==========================================================================
105          * === work
106          * ==================================================================
107          * ===========
108          */
109         public static void work(int myId, GlobalArgs args) {
110
111     float[][] feature = args.feature;
112     int nfeatures = args.nfeatures;
113     int npoints = args.npoints;
114     int nclusters = args.nclusters;
115     int[] membership = args.membership;
116     float[][] clusters = args.clusters;
117     int[] new_centers_len = args.new_centers_len;
118     float[][] new_centers = args.new_centers;
119     float delta = 0.0f;
120     int  start, stop;
121     
122     int CHUNK=500;
123     start = myId * CHUNK;
124
125     while (start < npoints) {
126         stop = (((start + CHUNK) < npoints) ? (start + CHUNK) : npoints);
127           
128           sese parallel{
129                 int indexArrayLen=stop-start;                   
130                 int indexArray[]=new int[indexArrayLen];
131                 int pidx=0;
132                  for (int i = start; i < stop; i++) {
133                    int index = Common.common_findNearestPoint(feature[i],
134                             nfeatures,
135                             clusters,
136                             nclusters);
137                    indexArray[pidx]=index;
138                  pidx++;
139                  }
140           }
141           
142           sese serial{
143                   
144                    int sidx=0;
145                    for (int i = start; i < stop; i++) {
146                      
147                             int newIndex=indexArray[sidx];
148                         if (membership[i] != newIndex) {
149                                 delta += 1.0f;
150                         }
151         
152                           membership[i] = newIndex;
153                           new_centers_len[newIndex] = new_centers_len[newIndex] + 1;
154                           
155                           float[] tmpnew_centers=new_centers[newIndex];
156                           float[] tmpfeature=feature[i];
157                           
158                           for (int j = 0; j < nfeatures; j++) {
159                             tmpnew_centers[j] = tmpnew_centers[j] + tmpfeature[j];
160                     }
161                   
162                         sidx++;
163                    }
164
165           }
166
167
168         if (start + CHUNK < npoints) {
169 //      atomic {
170             start = args.global_i;
171             args.global_i = start + CHUNK;
172 //          }
173         } else {
174           break;
175         }
176       }
177     
178
179 /*
180     while (start < npoints) {
181       stop = (((start + CHUNK) < npoints) ? (start + CHUNK) : npoints);
182
183       for (int i = start; i < stop; i++) {
184           
185           sese parallel{
186                 int index = Common.common_findNearestPoint(feature[i],
187                     nfeatures,
188                     clusters,
189                     nclusters);
190           }
191           
192           sese serial{
193                   
194                          // If membership changes, increase delta by 1.
195                          // membership[i] cannot be changed by other threads
196                         if (membership[i] != index) {
197                             delta += 1.0f;
198                         }
199
200                         // Assign the membership to object i 
201                 // membership[i] can't be changed by other thread 
202                 membership[i] = index;
203
204                 // Update new cluster centers : sum of objects located within 
205                   new_centers_len[index] = new_centers_len[index] + 1;
206                   for (int j = 0; j < nfeatures; j++) {
207                     new_centers[index][j] = new_centers[index][j] + feature[i][j];
208                   }
209           }
210           
211       }
212
213       // Update task queue 
214       if (start + CHUNK < npoints) {
215           start = args.global_i;
216           args.global_i = start + CHUNK;
217       } else {
218         break;
219       }
220     }//end of while
221 */
222       args.global_delta = args.global_delta + delta;
223   }
224
225         /*
226          * ==========================================================================
227          * === normal_exec
228          * ==========================================================
229          * ===================
230          */
231         public float[][] normal_exec(int nthreads, float[][] feature, /*
232                                                                                                                                  * in:
233                                                                                                                                  * [npoints][
234                                                                                                                                  * nfeatures]
235                                                                                                                                  */
236         int nfeatures, int npoints, int nclusters, float threshold,
237                         int[] membership, Random randomPtr, /* out: [npoints] */
238                         GlobalArgs args) {
239                 float delta;
240                 float[][] clusters; /* out: [nclusters][nfeatures] */
241
242                 /* Allocate space for returning variable clusters[] */
243                 clusters = new float[nclusters][nfeatures];
244
245                 /* Randomly pick cluster centers */
246                 for (int i = 0; i < nclusters; i++) {
247                         int n = (int) (randomPtr.random_generate() % npoints);
248                         for (int j = 0; j < nfeatures; j++) {
249                                 clusters[i][j] = feature[n][j];
250                         }
251                 }
252
253                 for (int i = 0; i < npoints; i++) {
254                         membership[i] = -1;
255                 }
256
257                 /*
258                  * Need to initialize new_centers_len and new_centers[0] to all 0.
259                  * Allocate clusters on different cache lines to reduce false sharing.
260                  */
261                 int[] new_centers_len = new int[nclusters];
262
263                 float[][] new_centers = new float[nclusters][nfeatures];
264
265                 int loop = 0;
266
267                 long start = System.currentTimeMillis();
268                 do {
269                         delta = 0.0f;
270
271                         args.feature = feature;
272                         args.nfeatures = nfeatures;
273                         args.npoints = npoints;
274                         args.nclusters = nclusters;
275                         args.membership = membership;
276                         args.clusters = clusters;
277                         args.new_centers_len = new_centers_len;
278                         args.new_centers = new_centers;
279
280                         args.global_i = nthreads * CHUNK;
281                         args.global_delta = delta;
282
283                         // Work in parallel with other threads
284                         thread_work(args);
285                         delta = args.global_delta;
286                         
287                         // put stall site here rather than having inside of loop
288             nclusters=clusters.length;
289             int newCenterLen=new_centers_len.length;
290             int new_centersLen=new_centers.length;
291       //
292
293                         /* Replace old cluster centers with new_centers */
294                         for (int i = 0; i < nclusters; i++) {
295                                 for (int j = 0; j < nfeatures; j++) {
296                                         if (new_centers_len[i] > 0) {
297                                                 clusters[i][j] = new_centers[i][j] / new_centers_len[i];
298                                         }
299                                         new_centers[i][j] = (float) 0.0; /* set back to 0 */
300                                 }
301                                 new_centers_len[i] = 0; /* set back to 0 */
302                         }
303
304                         delta /= npoints;
305
306                 } while ((delta > threshold) && (loop++ < 500));
307                 long stop = System.currentTimeMillis();
308                 args.global_time += (stop - start);
309
310                 return clusters;
311         }
312
313         /**
314          * Work done by primary thread in parallel with other threads
315          **/
316         void thread_work(GlobalArgs args) {
317                 Normal.work(0, args); // threadId = 0 because primary thread
318         }
319 }
320
321 /*
322  * =============================================================================
323  * 
324  * End of normal.java
325  * 
326  * =============================================================================
327  */
328
329