Robust/src/Benchmarks/SingleTM/Bayes/Adtree.java

   1 /* =============================================================================
   2  *
   3  * adtree.java
   4  *
   5  * =============================================================================
   6  *
   7  * Copyright (C) Stanford University, 2006.  All Rights Reserved.
   8  * Author: Chi Cao Minh
   9  * Ported to Java June 2009 Alokika Dash
  10  * University of California, Irvine
  11  *
  12  * =============================================================================
  13  *
  14  * Reference:
  15  *
  16  * A. Moore and M.-S. Lee. Cached sufficient statistics for efficient machine
  17  * learning with large datasets. Journal of Artificial Intelligence Research 8
  18  * (1998), pp 67-91.
  19  *
  20  * =============================================================================
  21  *
  22  * For the license of bayes/sort.h and bayes/sort.c, please see the header
  23  * of the files.
  24  *
  25  * ------------------------------------------------------------------------
  26  *
  27  * For the license of kmeans, please see kmeans/LICENSE.kmeans
  28  *
  29  * ------------------------------------------------------------------------
  30  *
  31  * For the license of ssca2, please see ssca2/COPYRIGHT
  32  *
  33  * ------------------------------------------------------------------------
  34  *
  35  * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the
  36  * header of the files.
  37  *
  38  * ------------------------------------------------------------------------
  39  *
  40  * For the license of lib/rbtree.h and lib/rbtree.c, please see
  41  * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree
  42  *
  43  * ------------------------------------------------------------------------
  44  *
  45  * Unless otherwise noted, the following license applies to STAMP files:
  46  *
  47  * Copyright (c) 2007, Stanford University
  48  * All rights reserved.
  49  *
  50  * Redistribution and use in source and binary forms, with or without
  51  * modification, are permitted provided that the following conditions are
  52  * met:
  53  *
  54  *     * Redistributions of source code must retain the above copyright
  55  *       notice, this list of conditions and the following disclaimer.
  56  *
  57  *     * Redistributions in binary form must reproduce the above copyright
  58  *       notice, this list of conditions and the following disclaimer in
  59  *       the documentation and/or other materials provided with the
  60  *       distribution.
  61  *
  62  *     * Neither the name of Stanford University nor the names of its
  63  *       contributors may be used to endorse or promote products derived
  64  *       from this software without specific prior written permission.
  65  *
  66  * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
  67  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  68  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  69  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
  70  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  71  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  72  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  73  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  74  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  75  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  76  * THE POSSIBILITY OF SUCH DAMAGE.
  77  *
  78  * =============================================================================
  79  */
  80
  81 public class Adtree {
  82   int numVar;
  83   int numRecord;
  84   AdtreeNode rootNodePtr;
  85
  86   /* =============================================================================
  87    * adtree_alloc
  88    * =============================================================================
  89    */
  90   public Adtree() {
  91     numVar = -1;
  92     numRecord = -1;
  93     rootNodePtr = null;
  94   }
  95
  96
  97   /* =============================================================================
  98    * makeVary
  99    * =============================================================================
 100    */
 101   public AdtreeVary makeVary (int parentIndex, int index, int start, int numRecord, Data dataPtr) {
 102     AdtreeVary varyPtr = new AdtreeVary(index);
 103
 104     if ((parentIndex + 1 != index) && (numRecord > 1)) {
 105       dataPtr.data_sort(start, numRecord, index);
 106     }
 107
 108     int num0 = dataPtr.data_findSplit(start, numRecord, index);
 109     int num1 = numRecord - num0;
 110
 111     int mostCommonValue = ((num0 >= num1) ? 0 : 1);
 112     varyPtr.mostCommonValue = mostCommonValue;
 113
 114     if (num0 == 0 || mostCommonValue == 0) {
 115     } else {
 116       varyPtr.zeroNodePtr = makeNode(index, index, start, num0, dataPtr);
 117       varyPtr.zeroNodePtr.value = 0;
 118     }
 119
 120     if (num1 == 0 || mostCommonValue == 1) {
 121     } else {
 122       varyPtr.oneNodePtr = makeNode(index, index, (start + num0), num1, dataPtr);
 123       varyPtr.oneNodePtr.value = 1;
 124     }
 125
 126     return varyPtr;
 127   }
 128
 129
 130   /* =============================================================================
 131    * makeNode
 132    * =============================================================================
 133    */
 134   public AdtreeNode makeNode (int parentIndex, int index, int start, int numRecord, Data dataPtr) {
 135     int numVar = dataPtr.numVar;
 136     AdtreeNode nodePtr = new AdtreeNode(index, numVar-index-1);
 137     nodePtr.count = numRecord;
 138
 139     AdtreeVary varyVectorPtr[] = nodePtr.varyVectorPtr;
 140     int i=0;
 141
 142     for (int v = (index + 1); v < numVar; v++) {
 143       AdtreeVary varyPtr = makeVary(parentIndex, v, start, numRecord, dataPtr);
 144       varyVectorPtr[i++]=varyPtr;
 145     }
 146
 147     return nodePtr;
 148   }
 149
 150
 151   /* =============================================================================
 152    * adtree_make
 153    * -- Records in dataPtr will get rearranged
 154    * =============================================================================
 155    */
 156   public void adtree_make (Data dataPtr) {
 157     numVar = dataPtr.numVar;
 158     numRecord = dataPtr.numRecord;
 159     dataPtr.data_sort(0, numRecord, 0);
 160     rootNodePtr = makeNode(-1, -1, 0, numRecord, dataPtr);
 161   }
 162
 163
 164   /* =============================================================================
 165    * getCount
 166    * =============================================================================
 167    */
 168   public int
 169     getCount (AdtreeNode nodePtr,
 170         int i,
 171         int q,
 172         Vector_t queryVectorPtr,
 173         int lastQueryIndex)
 174     {
 175       if (nodePtr == null) {
 176         return 0;
 177       }
 178
 179       int nodeIndex = nodePtr.index;
 180       if (nodeIndex >= lastQueryIndex) {
 181         return nodePtr.count;
 182       }
 183
 184       int count = 0;
 185
 186       Query queryPtr = (Query)(queryVectorPtr.vector_at(q));
 187
 188       if (queryPtr == null) {
 189         return nodePtr.count;
 190       }
 191       int queryIndex = queryPtr.index;
 192
 193       AdtreeVary varyPtr = nodePtr.varyVectorPtr[queryIndex - nodeIndex - 1];
 194
 195       int queryValue = queryPtr.value;
 196
 197       if (queryValue == varyPtr.mostCommonValue) {
 198
 199         /*
 200          * We do not explicitly store the counts for the most common value.
 201          * We can calculate it by finding the count of the query without
 202          * the current (superCount) and subtracting the count for the
 203          * query with the current toggled (invertCount).
 204          */
 205         int numQuery = queryVectorPtr.vector_getSize();
 206         Vector_t superQueryVectorPtr = new Vector_t(numQuery - 1);
 207
 208         for (int qq = 0; qq < numQuery; qq++) {
 209           if (qq != q) {
 210             boolean status = superQueryVectorPtr.vector_pushBack(queryVectorPtr.vector_at(qq));
 211           }
 212         }
 213
 214         int superCount = adtree_getCount(superQueryVectorPtr);
 215         superQueryVectorPtr.clear();
 216
 217         int invertCount;
 218         if (queryValue == 0) {
 219           queryPtr.value = 1;
 220           invertCount = getCount(nodePtr,
 221               i,
 222               q,
 223               queryVectorPtr,
 224               lastQueryIndex);
 225           queryPtr.value = 0;
 226         } else {
 227           queryPtr.value = 0;
 228           invertCount = getCount(nodePtr,
 229               i,
 230               q,
 231               queryVectorPtr,
 232               lastQueryIndex);
 233           queryPtr.value = 1;
 234         }
 235         count += superCount - invertCount;
 236
 237       } else {
 238
 239         if (queryValue == 0) {
 240           count += getCount(varyPtr.zeroNodePtr,
 241               (i + 1),
 242               (q + 1),
 243               queryVectorPtr,
 244               lastQueryIndex);
 245         } else if (queryValue == 1) {
 246           count += getCount(varyPtr.oneNodePtr,
 247               (i + 1),
 248               (q + 1),
 249               queryVectorPtr,
 250               lastQueryIndex);
 251         }
 252
 253       }
 254
 255       return count;
 256     }
 257
 258
 259   /* =============================================================================
 260    * adtree_getCount
 261    * -- queryVector must consist of queries sorted by id
 262    * =============================================================================
 263    */
 264   public int
 265     adtree_getCount (Vector_t queryVectorPtr)
 266     {
 267       if (rootNodePtr == null) {
 268         return 0;
 269       }
 270
 271       int lastQueryIndex = -1;
 272       int numQuery = queryVectorPtr.vector_getSize();
 273       if (numQuery > 0) {
 274         Query lastQueryPtr = (Query)(queryVectorPtr.vector_at(numQuery - 1));
 275         lastQueryIndex = lastQueryPtr.index;
 276       }
 277
 278       return getCount(rootNodePtr,
 279           -1,
 280           0,
 281           queryVectorPtr,
 282           lastQueryIndex);
 283     }
 284 }
 285 /* =============================================================================
 286  *
 287  * End of adtree.java
 288  *
 289  * =============================================================================
 290  */