Robust/src/Benchmarks/Prefetch/ManualPrefetch/MatrixMultiply/MatrixMultiplyN.java

   1 public class MatrixMultiply extends Thread{
   2     MMul mmul;
   3     public int x0, y0, x1, y1;
   4     public MatrixMultiply(MMul mmul, int x0, int x1, int y0, int y1) {
   5         this.mmul = mmul;
   6         this.x0 = x0;
   7         this.y0 = y0;
   8         this.x1 = x1;
   9         this.y1 = y1;
  10     }
  11
  12     public void run() {
  13         atomic {
  14         short[] offsets = new short[4];
  15         // Prefetch mmul.btranspose[][] matrix
  16         //Get all of B first...we need them first
  17         offsets[0] = getoffset{MMul, btranspose};
  18         offsets[1] = (short) 0;
  19         offsets[2] = (short) y0;
  20         offsets[3] = (short) (y1 - y0 -1);
  21         System.rangePrefetch(mmul, offsets);
  22
  23         //Get first part of A
  24         offsets[0] = getoffset{MMul, a};
  25         offsets[1] = (short) 0;
  26         offsets[2] = (short) x0;
  27         offsets[3] = (short) 15;
  28         System.rangePrefetch(mmul, offsets);
  29
  30         //Get first part of C
  31         offsets[0] = getoffset{MMul, c};
  32         offsets[1] = (short) 0;
  33         System.rangePrefetch(mmul, offsets);
  34         short[] offsets2=new short[2];
  35             double la[][]=mmul.a;
  36             double lc[][]=mmul.c;
  37             double lb[][]=mmul.btranspose;
  38             int M=mmul.M;
  39             int l=8;
  40         //Use btranspose for cache performance
  41             for(int i = x0; i< x1; i++,l++){
  42                 double a[]=la[i];
  43                 double c[]=lc[i];
  44                 if ((l&15)==0) {
  45                     offsets2[0] = (short) (x0+l);
  46                     if ((x0+l+16)>x1) {
  47                         int x=x1-x0-l-1;
  48                         if (x>0) {
  49                             offsets2[1]=(short) x;
  50                             System.rangePrefetch(la, offsets2);
  51                             System.rangePrefetch(lc, offsets2);
  52                         }
  53                     } else {
  54                         offsets2[1] = (short) 15;
  55                         System.rangePrefetch(la, offsets2);
  56                         System.rangePrefetch(lc, offsets2);
  57                     }
  58                 }
  59                 for (int j = y0; j < y1; j++) {
  60                     double innerProduct=0;
  61                     double b[] = lb[j];
  62                     for(int k = 0; k < M; k++) {
  63                         innerProduct += a[k] *b[k];
  64                     }
  65                     c[j]=innerProduct;
  66                 }
  67             }
  68         }
  69     }
  70
  71     public static void main(String[] args) {
  72         int NUM_THREADS = 4;
  73         int SIZE=600;
  74         if (args.length>0) {
  75             NUM_THREADS=Integer.parseInt(args[0]);
  76             if (args.length>1)
  77                 SIZE=Integer.parseInt(args[1]);
  78         }
  79
  80         int[] mid = new int[8];
  81         mid[0] = (128<<24)|(195<<16)|(136<<8)|162; //dc-1.calit2
  82         mid[1] = (128<<24)|(195<<16)|(136<<8)|163; //dc-2.calit2
  83         mid[2] = (128<<24)|(195<<16)|(136<<8)|164; //dc-3.calit2
  84         mid[3] = (128<<24)|(195<<16)|(136<<8)|165; //dc-4.calit2
  85         mid[4] = (128<<24)|(195<<16)|(136<<8)|166; //dc-5.calit2
  86         mid[5] = (128<<24)|(195<<16)|(136<<8)|167; //dc-6.calit2
  87         mid[6] = (128<<24)|(195<<16)|(136<<8)|168; //dc-7.calit2
  88         mid[7] = (128<<24)|(195<<16)|(136<<8)|169; //dc-8.calit2
  89
  90         int p, q, r;
  91         MatrixMultiply[] mm;
  92         MatrixMultiply tmp;
  93         MMul matrix;
  94
  95         atomic {
  96             matrix = global new MMul(SIZE, SIZE, SIZE);
  97             matrix.setValues();
  98             matrix.transpose();
  99             mm = global new MatrixMultiply[NUM_THREADS];
 100             int increment=SIZE/NUM_THREADS;
 101             int base=0;
 102             for(int i=0;i<NUM_THREADS;i++) {
 103                 if ((i+1)==NUM_THREADS)
 104                     mm[i]=global new MatrixMultiply(matrix,base, SIZE, 0, SIZE);
 105                 else
 106                     mm[i]=global new MatrixMultiply(matrix,base, base+increment, 0, SIZE);
 107                 base+=increment;
 108             }
 109             p = matrix.L;
 110             q = matrix.M;
 111             r = matrix.N;
 112         }
 113
 114         // print out the matrices to be multiplied
 115         System.printString("\n");
 116         System.printString("MatrixMultiply: L=");
 117         System.printInt(p);
 118         System.printString("\t");
 119         System.printString("M=");
 120         System.printInt(q);
 121         System.printString("\t");
 122         System.printString("N=");
 123         System.printInt(r);
 124         System.printString("\n");
 125
 126         // start a thread to compute each c[l,n]
 127         for (int i = 0; i < NUM_THREADS; i++) {
 128             atomic {
 129                 tmp = mm[i];
 130             }
 131             tmp.start(mid[i]);
 132         }
 133
 134
 135         // wait for them to finish
 136         for (int i = 0; i < NUM_THREADS; i++) {
 137             atomic {
 138                 tmp = mm[i];
 139             }
 140             tmp.join();
 141         }
 142
 143         // print out the result of the matrix multiply
 144
 145         System.printString("Finished\n");
 146     }
 147 }
 148
 149 public class MMul{
 150
 151         public int L, M, N;
 152         public double[][] a;
 153         public double[][] b;
 154         public double[][] c;
 155         public double[][] btranspose;
 156
 157         public MMul(int L, int M, int N) {
 158                 this.L = L;
 159                 this.M = M;
 160                 this.N = N;
 161                 a = global new double[L][M];
 162                 b = global new double[M][N];
 163                 c = global new double[L][N];
 164                 btranspose = global new double[N][M];
 165         }
 166
 167         public void setValues() {
 168                 for(int i = 0; i < L; i++) {
 169             double ai[] = a[i];
 170                         for(int j = 0; j < M; j++) {
 171                                 ai[j] = j+1;
 172                         }
 173                 }
 174
 175                 for(int i = 0; i < M; i++) {
 176             double bi[] = b[i];
 177                         for(int j = 0; j < N; j++) {
 178                                 bi[j] = j+1;
 179                         }
 180                 }
 181
 182                 for(int i = 0; i < L; i++) {
 183             double ci[] = c[i];
 184                         for(int j = 0; j < N; j++) {
 185                                 ci[j] = 0;
 186                         }
 187                 }
 188                 for(int i = 0; i < N; i++) {
 189             double btransposei[] = btranspose[i];
 190                         for(int j = 0; j < M; j++) {
 191                                 btransposei[j] = 0;
 192                         }
 193                 }
 194         }
 195
 196         public void transpose() {
 197                 for(int row = 0; row < M; row++) {
 198             double brow[] = b[row];
 199                         for(int col = 0; col < N; col++) {
 200                                 btranspose[col][row] = brow[col];
 201                         }
 202                 }
 203         }
 204 }