1 /* =============================================================================
5 * =============================================================================
7 * Copyright (C) Stanford University, 2006. All Rights Reserved.
9 * Ported to Java June 2009 Alokika Dash
10 * University of California, Irvine
12 * =============================================================================
16 * A. Moore and M.-S. Lee. Cached sufficient statistics for efficient machine
17 * learning with large datasets. Journal of Artificial Intelligence Research 8
20 * =============================================================================
22 * For the license of bayes/sort.h and bayes/sort.c, please see the header
25 * ------------------------------------------------------------------------
27 * For the license of kmeans, please see kmeans/LICENSE.kmeans
29 * ------------------------------------------------------------------------
31 * For the license of ssca2, please see ssca2/COPYRIGHT
33 * ------------------------------------------------------------------------
35 * For the license of lib/mt19937ar.c and lib/mt19937ar.h, please see the
36 * header of the files.
38 * ------------------------------------------------------------------------
40 * For the license of lib/rbtree.h and lib/rbtree.c, please see
41 * lib/LEGALNOTICE.rbtree and lib/LICENSE.rbtree
43 * ------------------------------------------------------------------------
45 * Unless otherwise noted, the following license applies to STAMP files:
47 * Copyright (c) 2007, Stanford University
48 * All rights reserved.
50 * Redistribution and use in source and binary forms, with or without
51 * modification, are permitted provided that the following conditions are
54 * * Redistributions of source code must retain the above copyright
55 * notice, this list of conditions and the following disclaimer.
57 * * Redistributions in binary form must reproduce the above copyright
58 * notice, this list of conditions and the following disclaimer in
59 * the documentation and/or other materials provided with the
62 * * Neither the name of Stanford University nor the names of its
63 * contributors may be used to endorse or promote products derived
64 * from this software without specific prior written permission.
66 * THIS SOFTWARE IS PROVIDED BY STANFORD UNIVERSITY ``AS IS'' AND ANY
67 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
68 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
69 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL STANFORD UNIVERSITY BE LIABLE
70 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
71 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
72 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
73 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
74 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
75 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
76 * THE POSSIBILITY OF SUCH DAMAGE.
78 * =============================================================================
84 AdtreeNode rootNodePtr;
86 /* =============================================================================
88 * =============================================================================
97 /* =============================================================================
99 * =============================================================================
101 public AdtreeVary makeVary (int parentIndex, int index, int start, int numRecord, Data dataPtr) {
102 AdtreeVary varyPtr = new AdtreeVary(index);
104 if ((parentIndex + 1 != index) && (numRecord > 1)) {
105 dataPtr.data_sort(start, numRecord, index);
108 int num0 = dataPtr.data_findSplit(start, numRecord, index);
109 int num1 = numRecord - num0;
111 int mostCommonValue = ((num0 >= num1) ? 0 : 1);
112 varyPtr.mostCommonValue = mostCommonValue;
114 if (num0 == 0 || mostCommonValue == 0) {
116 varyPtr.zeroNodePtr = makeNode(index, index, start, num0, dataPtr);
117 varyPtr.zeroNodePtr.value = 0;
120 if (num1 == 0 || mostCommonValue == 1) {
122 varyPtr.oneNodePtr = makeNode(index, index, (start + num0), num1, dataPtr);
123 varyPtr.oneNodePtr.value = 1;
130 /* =============================================================================
132 * =============================================================================
134 public AdtreeNode makeNode (int parentIndex, int index, int start, int numRecord, Data dataPtr) {
135 int numVar = dataPtr.numVar;
136 AdtreeNode nodePtr = new AdtreeNode(index, numVar-index-1);
137 nodePtr.count = numRecord;
139 AdtreeVary varyVectorPtr[] = nodePtr.varyVectorPtr;
142 for (int v = (index + 1); v < numVar; v++) {
143 AdtreeVary varyPtr = makeVary(parentIndex, v, start, numRecord, dataPtr);
144 varyVectorPtr[i++]=varyPtr;
151 /* =============================================================================
153 * -- Records in dataPtr will get rearranged
154 * =============================================================================
156 public void adtree_make (Data dataPtr) {
157 numVar = dataPtr.numVar;
158 numRecord = dataPtr.numRecord;
159 dataPtr.data_sort(0, numRecord, 0);
160 rootNodePtr = makeNode(-1, -1, 0, numRecord, dataPtr);
164 /* =============================================================================
166 * =============================================================================
169 getCount (AdtreeNode nodePtr,
172 Vector_t queryVectorPtr,
175 if (nodePtr == null) {
179 int nodeIndex = nodePtr.index;
180 if (nodeIndex >= lastQueryIndex) {
181 return nodePtr.count;
186 Query queryPtr = (Query)(queryVectorPtr.vector_at(q));
188 if (queryPtr == null) {
189 return nodePtr.count;
191 int queryIndex = queryPtr.index;
193 AdtreeVary varyPtr = nodePtr.varyVectorPtr[queryIndex - nodeIndex - 1];
195 int queryValue = queryPtr.value;
197 if (queryValue == varyPtr.mostCommonValue) {
200 * We do not explicitly store the counts for the most common value.
201 * We can calculate it by finding the count of the query without
202 * the current (superCount) and subtracting the count for the
203 * query with the current toggled (invertCount).
205 int numQuery = queryVectorPtr.vector_getSize();
206 Vector_t superQueryVectorPtr = new Vector_t(numQuery - 1);
208 for (int qq = 0; qq < numQuery; qq++) {
210 boolean status = superQueryVectorPtr.vector_pushBack(queryVectorPtr.vector_at(qq));
214 int superCount = adtree_getCount(superQueryVectorPtr);
215 superQueryVectorPtr.clear();
218 if (queryValue == 0) {
220 invertCount = getCount(nodePtr,
228 invertCount = getCount(nodePtr,
235 count += superCount - invertCount;
239 if (queryValue == 0) {
240 count += getCount(varyPtr.zeroNodePtr,
245 } else if (queryValue == 1) {
246 count += getCount(varyPtr.oneNodePtr,
259 /* =============================================================================
261 * -- queryVector must consist of queries sorted by id
262 * =============================================================================
265 adtree_getCount (Vector_t queryVectorPtr)
267 if (rootNodePtr == null) {
271 int lastQueryIndex = -1;
272 int numQuery = queryVectorPtr.vector_getSize();
274 Query lastQueryPtr = (Query)(queryVectorPtr.vector_at(numQuery - 1));
275 lastQueryIndex = lastQueryPtr.index;
278 return getCount(rootNodePtr,
285 /* =============================================================================
289 * =============================================================================