1 /* =============================================================================
5 * =============================================================================
7 * For the license of ssca2, please see ssca2/COPYRIGHT
9 * ------------------------------------------------------------------------
11 * Unless otherwise noted, the following license applies to STAMP files:
13 * Copyright (c) 2007, Stanford University
14 * All rights reserved.
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions are
20 * * Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
23 * * Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in
25 * the documentation and/or other materials provided with the
28 * * Neither the name of Stanford University nor the names of its
29 * contributors may be used to endorse or promote products derived
30 * from this software without specific prior written permission.
32 * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
33 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
35 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
36 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
37 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
40 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
42 * THE POSSIBILITY OF SUCH DAMAGE.
44 * =============================================================================
47 public class Alg_Radix_Smp {
55 public class Alg_Radix_Smp(int myId, int numThread) {
56 global_myHisto = null;
57 global_psHisto = null;
61 this.numThread = numThread;
64 public int BITS(x, k, j) {
65 int retval = ((x>>k) & ~(~0<<j));
69 /* =============================================================================
72 * R (range) must be a multiple of NODES
73 * q (elems/proc) must be a multiple of NODES
74 * =============================================================================
77 all_countsort_node (int q,
89 myHisto = new int[numThread*R];
90 global_myHisto = myHisto;
91 psHisto = new int[numThread*R];
92 global_psHisto = psHisto;
95 Barrier.enterBarrier();
97 myHisto = global_myHisto;
98 psHisto = global_psHisto;
100 int index = myId * R;
102 for (int k = index; k < index+R; k++) {
106 LocalStartStop lss = new LocalStartStop();
107 CreatePartition.createPartition(0, q, myId, numThread, lss);
109 for (int k = lss.i_start; k < i_stop; k++) {
110 myHisto[(myId * R) + BITS(lKey[k],bitOff,m)]++;
113 Barrier.enterBarrier();
115 CreatePartition.createPartition(0, R, myId, numThread, lss);
118 for (int k = lss.i_start; k < lss.i_stop; k++) {
119 last = psHisto[k] = myHisto[k];
120 for (int j = 1; j < numThread; j++) {
121 int temp = psHisto[(j*R) + k] = last + myHisto[(j*R) + k];
126 Barrier.enterBarrier();
130 for (k = 0; k < R; k++) {
131 myHisto[(myId * R) + k] = (psHisto[(myId * R) + k] - myHisto[(myId * R) + k]) + offset;
132 offset += psHisto[((numThread - 1) * R) + k];
135 Barrier.enterBarrier();
137 CreatePartition.createPartition(0, q, myId, numThread, lss);
139 for (int k = lss.i_start; k < lss.i_stop; k++) {
140 int j = BITS(lKey[k],bitOff,m);
141 lSorted[myHisto[(myId * R) + j]] = lKey[k];
142 myHisto[(myId * R) + j]++;
145 Barrier.enterBarrier();
154 /* =============================================================================
155 * all_countsort_node_aux_seq
157 * R (range) must be a multiple of NODES
158 * q (elems/proc) must be a multiple of NODES
159 * =============================================================================
162 all_countsort_node_aux_seq (int q,
171 int[] myHisto = new int[ R];
172 int[] psHisto = new int[ R];
174 for (int k = 0; k < R; k++) {
178 for (int k = 0; k < q; k++) {
179 myHisto[BITS(lKey[k],bitOff,m)]++;
183 for (int k = 0; k < R; k++) {
184 last = psHisto[k] = myHisto[k];
189 for (int k = 0; k < R; k++) {
190 myHisto[k] = (psHisto[k] - myHisto[k]) + offset;
191 offset += psHisto[k];
194 for (int k = 0; k < q; k++) {
195 int j = BITS(lKey[k], bitOff, m);
196 lSorted[myHisto[j]] = lKey[k];
197 auxSorted[myHisto[j]] = lKey[k];
201 lSorted[mhp[j]] = lKey[k];
202 auxSorted[mhp[j]] = auxKey[k];
212 /* =============================================================================
213 * all_countsort_node_aux
215 * R (range) must be a multiple of NODES
216 * q (elems/proc) must be a multiple of NODES
217 * =============================================================================
220 all_countsort_node_aux (int q,
229 int[] myHisto = null;
230 int[] psHisto = null;
233 myHisto = new int[numThread * R];
234 global_myHisto = myHisto;
235 psHisto = new int[numThread * R];
236 global_psHisto = psHisto;
239 Barrier.enterBarrier();
241 myHisto = global_myHisto;
242 psHisto = global_psHisto;
244 for (int k = 0; k < R; k++) {
245 myHisto[((myId*R) + k)] = 0;
248 LocalStartStop lss = new LocalStartStop();
249 CreatePartition.createPartition(0, q, myId, numThread, lss);
251 for (int k = lss.i_start; k < lss.i_stop; k++) {
252 myHisto[(myId*R) + BITS(lKey[k],bitOff,m)]++;
255 Barrier.enterBarrier();
257 CreatePartition.createPartition(0, R, myId, numThread, lss);
260 for (int k = lss.i_start; k < lss.i_stop; k++) {
261 last = psHisto[k] = myHisto[k];
262 for (int j = 1; j < numThread; j++) {
263 int temp = psHisto[(j*R + k)] = last + myHisto[ (j*R + k)];
268 Barrier.enterBarrier();
272 for (int k = 0; k < R; k++) {
273 myHisto[(myId*R) +k] = (psHisto[ ((myId*R) + k)] - myHisto[ ((myId*R) +k])) + offset;
274 offset += psHisto[((numThread -1) * R) + k];
277 Barrier.enterBarrier();
279 CreatePartition.createPartition(0, q, myId, numThread, lss);
281 for (int k = lss.i_start; k < lss.i_stop; k++) {
282 int j = BITS(lKey[k], bitOff, m);
283 lSorted[myHisto[(myId*R) +j]] = lKey[k];
284 auxSorted[myHisto[(myId*R) +j]] = auxKey[k];
285 myHisto[(myId*R) +j]++;
288 Barrier.enterBarrier();
297 /* =============================================================================
298 * all_radixsort_node_s3
300 * q (elems/proc) must be a multiple of NODES
301 * =============================================================================
304 all_radixsort_node_s3 (int q,
313 global_lTemp = lTemp;
316 Barrier.enterBarrier();
318 lTemp = global_lTemp;
320 all_countsort_node(q, lKeys, lSorted, (1<<11), 0, 11);
321 all_countsort_node(q, lSorted, lTemp, (1<<11), 11, 11);
322 all_countsort_node(q, lTemp, lSorted, (1<<10), 22, 10);
324 Barrier.enterBarrier();
332 /* =============================================================================
333 * all_radixsort_node_s2
335 * q (elems/proc) must be a multiple of NODES
336 * =============================================================================
339 all_radixsort_node_s2 (int q,
348 global_lTemp = lTemp;
351 Barrier.enterBarrier();
353 lTemp = global_lTemp;
355 all_countsort_node(q, lKeys, lTemp, (1<<16), 0, 16);
356 all_countsort_node(q, lTemp, lSorted, (1<<16), 16, 16);
358 Barrier.enterBarrier();
366 /* =============================================================================
367 * all_radixsort_node_aux_s3_seq
369 * q (elems/proc) must be a multiple of NODES
370 * =============================================================================
373 all_radixsort_node_aux_s3_seq (int q,
383 lTemp2 = new int[ q];
385 all_countsort_node_aux_seq(q, lKeys, lSorted, auxKey, auxSorted, (1<<11), 0, 11);
386 all_countsort_node_aux_seq(q, lSorted, lTemp, auxSorted, lTemp2, (1<<11), 11, 11);
387 all_countsort_node_aux_seq(q, lTemp, lSorted, lTemp2, auxSorted, (1<<10), 22, 10);
394 /* =============================================================================
395 * all_radixsort_node_aux_s3
397 * q (elems/proc) must be a multiple of NODES
398 * =============================================================================
401 all_radixsort_node_aux_s3 (int q,
412 global_lTemp = lTemp;
413 lTemp2 = new int[ q];
414 global_lTemp2 = lTemp2;
417 Barrier.enterBarrier();
419 lTemp = global_lTemp;
420 lTemp2 = global_lTemp2;
422 all_countsort_node_aux(q, lKeys, lSorted, auxKey, auxSorted, (1<<11), 0, 11);
423 all_countsort_node_aux(q, lSorted, lTemp, auxSorted, lTemp2, (1<<11), 11, 11);
424 all_countsort_node_aux(q, lTemp, lSorted, lTemp2, auxSorted, (1<<10), 22, 10);
426 Barrier.enterBarrier();
435 /* =============================================================================
437 * End of alg_radix_smp.java
439 * =============================================================================