queso-0.53.0
bd_tree.cpp
Go to the documentation of this file.
1 //----------------------------------------------------------------------
2 // File: bd_tree.cpp
3 // Programmer: David Mount
4 // Description: Basic methods for bd-trees.
5 // Last modified: 01/04/05 (Version 1.0)
6 //----------------------------------------------------------------------
7 // Copyright (c) 1997-2005 University of Maryland and Sunil Arya and
8 // David Mount. All Rights Reserved.
9 //
10 // This software and related documentation is part of the Approximate
11 // Nearest Neighbor Library (ANN). This software is provided under
12 // the provisions of the Lesser GNU Public License (LGPL). See the
13 // file ../ReadMe.txt for further information.
14 //
15 // The University of Maryland (U.M.) and the authors make no
16 // representations about the suitability or fitness of this software for
17 // any purpose. It is provided "as is" without express or implied
18 // warranty.
19 //----------------------------------------------------------------------
20 // History:
21 // Revision 0.1 03/04/98
22 // Initial release
23 // Revision l.0 04/01/05
24 // Fixed centroid shrink threshold condition to depend on the
25 // dimension.
26 // Moved dump routine to kd_dump.cpp.
27 //----------------------------------------------------------------------
28 
29 #include "bd_tree.h" // bd-tree declarations
30 #include "kd_util.h" // kd-tree utilities
31 #include "kd_split.h" // kd-tree splitting rules
32 
33 #include <ANN/ANNperf.h> // performance evaluation
34 
35 //----------------------------------------------------------------------
36 // Printing a bd-tree
37 // These routines print a bd-tree. See the analogous procedure
38 // in kd_tree.cpp for more information.
39 //----------------------------------------------------------------------
40 
41 void ANNbd_shrink::print( // print shrinking node
42  int level, // depth of node in tree
43  ostream &out) // output stream
44 {
45  child[ANN_OUT]->print(level+1, out); // print out-child
46 
47  out << " ";
48  for (int i = 0; i < level; i++) // print indentation
49  out << "..";
50  out << "Shrink";
51  for (int j = 0; j < n_bnds; j++) { // print sides, 2 per line
52  if (j % 2 == 0) {
53  out << "\n"; // newline and indentation
54  for (int i = 0; i < level+2; i++) out << " ";
55  }
56  out << " ([" << bnds[j].cd << "]"
57  << (bnds[j].sd > 0 ? ">=" : "< ")
58  << bnds[j].cv << ")";
59  }
60  out << "\n";
61 
62  child[ANN_IN]->print(level+1, out); // print in-child
63 }
64 
65 //----------------------------------------------------------------------
66 // kd_tree statistics utility (for performance evaluation)
67 // This routine computes various statistics information for
68 // shrinking nodes. See file kd_tree.cpp for more information.
69 //----------------------------------------------------------------------
70 
71 void ANNbd_shrink::getStats( // get subtree statistics
72  int dim, // dimension of space
73  ANNkdStats &st, // stats (modified)
74  ANNorthRect &bnd_box) // bounding box
75 {
76  ANNkdStats ch_stats; // stats for children
77  ANNorthRect inner_box(dim); // inner box of shrink
78 
79  annBnds2Box(bnd_box, // enclosing box
80  dim, // dimension
81  n_bnds, // number of bounds
82  bnds, // bounds array
83  inner_box); // inner box (modified)
84  // get stats for inner child
85  ch_stats.reset(); // reset
86  child[ANN_IN]->getStats(dim, ch_stats, inner_box);
87  st.merge(ch_stats); // merge them
88  // get stats for outer child
89  ch_stats.reset(); // reset
90  child[ANN_OUT]->getStats(dim, ch_stats, bnd_box);
91  st.merge(ch_stats); // merge them
92 
93  st.depth++; // increment depth
94  st.n_shr++; // increment number of shrinks
95 }
96 
97 //----------------------------------------------------------------------
98 // bd-tree constructor
99 // This is the main constructor for bd-trees given a set of points.
100 // It first builds a skeleton kd-tree as a basis, then computes the
101 // bounding box of the data points, and then invokes rbd_tree() to
102 // actually build the tree, passing it the appropriate splitting
103 // and shrinking information.
104 //----------------------------------------------------------------------
105 
106 ANNkd_ptr rbd_tree( // recursive construction of bd-tree
107  ANNpointArray pa, // point array
108  ANNidxArray pidx, // point indices to store in subtree
109  int n, // number of points
110  int dim, // dimension of space
111  int bsp, // bucket space
112  ANNorthRect &bnd_box, // bounding box for current node
113  ANNkd_splitter splitter, // splitting routine
114  ANNshrinkRule shrink); // shrinking rule
115 
116 ANNbd_tree::ANNbd_tree( // construct from point array
117  ANNpointArray pa, // point array (with at least n pts)
118  int n, // number of points
119  int dd, // dimension
120  int bs, // bucket size
121  ANNsplitRule split, // splitting rule
122  ANNshrinkRule shrink) // shrinking rule
123  : ANNkd_tree(n, dd, bs) // build skeleton base tree
124 {
125  pts = pa; // where the points are
126  if (n == 0) return; // no points--no sweat
127 
128  ANNorthRect bnd_box(dd); // bounding box for points
129  // construct bounding rectangle
130  annEnclRect(pa, pidx, n, dd, bnd_box);
131  // copy to tree structure
132  bnd_box_lo = annCopyPt(dd, bnd_box.lo);
133  bnd_box_hi = annCopyPt(dd, bnd_box.hi);
134 
135  switch (split) { // build by rule
136  case ANN_KD_STD: // standard kd-splitting rule
137  root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, kd_split, shrink);
138  break;
139  case ANN_KD_MIDPT: // midpoint split
140  root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, midpt_split, shrink);
141  break;
142  case ANN_KD_SUGGEST: // best (in our opinion)
143  case ANN_KD_SL_MIDPT: // sliding midpoint split
144  root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, sl_midpt_split, shrink);
145  break;
146  case ANN_KD_FAIR: // fair split
147  root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, fair_split, shrink);
148  break;
149  case ANN_KD_SL_FAIR: // sliding fair split
150  root = rbd_tree(pa, pidx, n, dd, bs,
151  bnd_box, sl_fair_split, shrink);
152  break;
153  default:
154  annError("Illegal splitting method", ANNabort);
155  }
156 }
157 
158 //----------------------------------------------------------------------
159 // Shrinking rules
160 //----------------------------------------------------------------------
161 
162 enum ANNdecomp {SPLIT, SHRINK}; // decomposition methods
163 
164 //----------------------------------------------------------------------
165 // trySimpleShrink - Attempt a simple shrink
166 //
167 // We compute the tight bounding box of the points, and compute
168 // the 2*dim ``gaps'' between the sides of the tight box and the
169 // bounding box. If any of the gaps is large enough relative to
170 // the longest side of the tight bounding box, then we shrink
171 // all sides whose gaps are large enough. (The reason for
172 // comparing against the tight bounding box, is that after
173 // shrinking the longest box size will decrease, and if we use
174 // the standard bounding box, we may decide to shrink twice in
175 // a row. Since the tight box is fixed, we cannot shrink twice
176 // consecutively.)
177 //----------------------------------------------------------------------
178 const float BD_GAP_THRESH = 0.5; // gap threshold (must be < 1)
179 const int BD_CT_THRESH = 2; // min number of shrink sides
180 
181 ANNdecomp trySimpleShrink( // try a simple shrink
182  ANNpointArray pa, // point array
183  ANNidxArray pidx, // point indices to store in subtree
184  int n, // number of points
185  int dim, // dimension of space
186  const ANNorthRect &bnd_box, // current bounding box
187  ANNorthRect &inner_box) // inner box if shrinking (returned)
188 {
189  int i;
190  // compute tight bounding box
191  annEnclRect(pa, pidx, n, dim, inner_box);
192 
193  ANNcoord max_length = 0; // find longest box side
194  for (i = 0; i < dim; i++) {
195  ANNcoord length = inner_box.hi[i] - inner_box.lo[i];
196  if (length > max_length) {
197  max_length = length;
198  }
199  }
200 
201  int shrink_ct = 0; // number of sides we shrunk
202  for (i = 0; i < dim; i++) { // select which sides to shrink
203  // gap between boxes
204  ANNcoord gap_hi = bnd_box.hi[i] - inner_box.hi[i];
205  // big enough gap to shrink?
206  if (gap_hi < max_length*BD_GAP_THRESH)
207  inner_box.hi[i] = bnd_box.hi[i]; // no - expand
208  else shrink_ct++; // yes - shrink this side
209 
210  // repeat for high side
211  ANNcoord gap_lo = inner_box.lo[i] - bnd_box.lo[i];
212  if (gap_lo < max_length*BD_GAP_THRESH)
213  inner_box.lo[i] = bnd_box.lo[i]; // no - expand
214  else shrink_ct++; // yes - shrink this side
215  }
216 
217  if (shrink_ct >= BD_CT_THRESH) // did we shrink enough sides?
218  return SHRINK;
219  else return SPLIT;
220 }
221 
222 //----------------------------------------------------------------------
223 // tryCentroidShrink - Attempt a centroid shrink
224 //
225 // We repeatedly apply the splitting rule, always to the larger subset
226 // of points, until the number of points decreases by the constant
227 // fraction BD_FRACTION. If this takes more than dim*BD_MAX_SPLIT_FAC
228 // splits for this to happen, then we shrink to the final inner box
229 // Otherwise we split.
230 //----------------------------------------------------------------------
231 
232 const float BD_MAX_SPLIT_FAC = 0.5; // maximum number of splits allowed
233 const float BD_FRACTION = 0.5; // ...to reduce points by this fraction
234  // ...This must be < 1.
235 
236 ANNdecomp tryCentroidShrink( // try a centroid shrink
237  ANNpointArray pa, // point array
238  ANNidxArray pidx, // point indices to store in subtree
239  int n, // number of points
240  int dim, // dimension of space
241  const ANNorthRect &bnd_box, // current bounding box
242  ANNkd_splitter splitter, // splitting procedure
243  ANNorthRect &inner_box) // inner box if shrinking (returned)
244 {
245  int n_sub = n; // number of points in subset
246  int n_goal = (int) (n*BD_FRACTION); // number of point in goal
247  int n_splits = 0; // number of splits needed
248  // initialize inner box to bounding box
249  annAssignRect(dim, inner_box, bnd_box);
250 
251  while (n_sub > n_goal) { // keep splitting until goal reached
252  int cd; // cut dim from splitter (ignored)
253  ANNcoord cv; // cut value from splitter (ignored)
254  int n_lo; // number of points on low side
255  // invoke splitting procedure
256  (*splitter)(pa, pidx, inner_box, n_sub, dim, cd, cv, n_lo);
257  n_splits++; // increment split count
258 
259  if (n_lo >= n_sub/2) { // most points on low side
260  inner_box.hi[cd] = cv; // collapse high side
261  n_sub = n_lo; // recurse on lower points
262  }
263  else { // most points on high side
264  inner_box.lo[cd] = cv; // collapse low side
265  pidx += n_lo; // recurse on higher points
266  n_sub -= n_lo;
267  }
268  }
269  if (n_splits > dim*BD_MAX_SPLIT_FAC)// took too many splits
270  return SHRINK; // shrink to final subset
271  else
272  return SPLIT;
273 }
274 
275 //----------------------------------------------------------------------
276 // selectDecomp - select which decomposition to use
277 //----------------------------------------------------------------------
278 
279 ANNdecomp selectDecomp( // select decomposition method
280  ANNpointArray pa, // point array
281  ANNidxArray pidx, // point indices to store in subtree
282  int n, // number of points
283  int dim, // dimension of space
284  const ANNorthRect &bnd_box, // current bounding box
285  ANNkd_splitter splitter, // splitting procedure
286  ANNshrinkRule shrink, // shrinking rule
287  ANNorthRect &inner_box) // inner box if shrinking (returned)
288 {
289  ANNdecomp decomp = SPLIT; // decomposition
290 
291  switch (shrink) { // check shrinking rule
292  case ANN_BD_NONE: // no shrinking allowed
293  decomp = SPLIT;
294  break;
295  case ANN_BD_SUGGEST: // author's suggestion
296  case ANN_BD_SIMPLE: // simple shrink
297  decomp = trySimpleShrink(
298  pa, pidx, // points and indices
299  n, dim, // number of points and dimension
300  bnd_box, // current bounding box
301  inner_box); // inner box if shrinking (returned)
302  break;
303  case ANN_BD_CENTROID: // centroid shrink
304  decomp = tryCentroidShrink(
305  pa, pidx, // points and indices
306  n, dim, // number of points and dimension
307  bnd_box, // current bounding box
308  splitter, // splitting procedure
309  inner_box); // inner box if shrinking (returned)
310  break;
311  default:
312  annError("Illegal shrinking rule", ANNabort);
313  }
314  return decomp;
315 }
316 
317 //----------------------------------------------------------------------
318 // rbd_tree - recursive procedure to build a bd-tree
319 //
320 // This is analogous to rkd_tree, but for bd-trees. See the
321 // procedure rkd_tree() in kd_split.cpp for more information.
322 //
323 // If the number of points falls below the bucket size, then a
324 // leaf node is created for the points. Otherwise we invoke the
325 // procedure selectDecomp() which determines whether we are to
326 // split or shrink. If splitting is chosen, then we essentially
327 // do exactly as rkd_tree() would, and invoke the specified
328 // splitting procedure to the points. Otherwise, the selection
329 // procedure returns a bounding box, from which we extract the
330 // appropriate shrinking bounds, and create a shrinking node.
331 // Finally the points are subdivided, and the procedure is
332 // invoked recursively on the two subsets to form the children.
333 //----------------------------------------------------------------------
334 
335 ANNkd_ptr rbd_tree( // recursive construction of bd-tree
336  ANNpointArray pa, // point array
337  ANNidxArray pidx, // point indices to store in subtree
338  int n, // number of points
339  int dim, // dimension of space
340  int bsp, // bucket space
341  ANNorthRect &bnd_box, // bounding box for current node
342  ANNkd_splitter splitter, // splitting routine
343  ANNshrinkRule shrink) // shrinking rule
344 {
345  ANNdecomp decomp; // decomposition method
346 
347  ANNorthRect inner_box(dim); // inner box (if shrinking)
348 
349  if (n <= bsp) { // n small, make a leaf node
350  if (n == 0) // empty leaf node
351  return KD_TRIVIAL; // return (canonical) empty leaf
352  else // construct the node and return
353  return new ANNkd_leaf(n, pidx);
354  }
355 
356  decomp = selectDecomp( // select decomposition method
357  pa, pidx, // points and indices
358  n, dim, // number of points and dimension
359  bnd_box, // current bounding box
360  splitter, shrink, // splitting/shrinking methods
361  inner_box); // inner box if shrinking (returned)
362 
363  if (decomp == SPLIT) { // split selected
364  int cd; // cutting dimension
365  ANNcoord cv; // cutting value
366  int n_lo; // number on low side of cut
367  // invoke splitting procedure
368  (*splitter)(pa, pidx, bnd_box, n, dim, cd, cv, n_lo);
369 
370  ANNcoord lv = bnd_box.lo[cd]; // save bounds for cutting dimension
371  ANNcoord hv = bnd_box.hi[cd];
372 
373  bnd_box.hi[cd] = cv; // modify bounds for left subtree
374  ANNkd_ptr lo = rbd_tree( // build left subtree
375  pa, pidx, n_lo, // ...from pidx[0..n_lo-1]
376  dim, bsp, bnd_box, splitter, shrink);
377  bnd_box.hi[cd] = hv; // restore bounds
378 
379  bnd_box.lo[cd] = cv; // modify bounds for right subtree
380  ANNkd_ptr hi = rbd_tree( // build right subtree
381  pa, pidx + n_lo, n-n_lo,// ...from pidx[n_lo..n-1]
382  dim, bsp, bnd_box, splitter, shrink);
383  bnd_box.lo[cd] = lv; // restore bounds
384  // create the splitting node
385  return new ANNkd_split(cd, cv, lv, hv, lo, hi);
386  }
387  else { // shrink selected
388  int n_in; // number of points in box
389  int n_bnds; // number of bounding sides
390 
391  annBoxSplit( // split points around inner box
392  pa, // points to split
393  pidx, // point indices
394  n, // number of points
395  dim, // dimension
396  inner_box, // inner box
397  n_in); // number of points inside (returned)
398 
399  ANNkd_ptr in = rbd_tree( // build inner subtree pidx[0..n_in-1]
400  pa, pidx, n_in, dim, bsp, inner_box, splitter, shrink);
401  ANNkd_ptr out = rbd_tree( // build outer subtree pidx[n_in..n]
402  pa, pidx+n_in, n - n_in, dim, bsp, bnd_box, splitter, shrink);
403 
404  ANNorthHSArray bnds = NULL; // bounds (alloc in Box2Bnds and
405  // ...freed in bd_shrink destroyer)
406 
407  annBox2Bnds( // convert inner box to bounds
408  inner_box, // inner box
409  bnd_box, // enclosing box
410  dim, // dimension
411  n_bnds, // number of bounds (returned)
412  bnds); // bounds array (modified)
413 
414  // return shrinking node
415  return new ANNbd_shrink(n_bnds, bnds, in, out);
416  }
417 }
ANNidxArray pidx
Definition: ANN.h:711
void(* ANNkd_splitter)(ANNpointArray pa, ANNidxArray pidx, const ANNorthRect &bnds, int n, int dim, int &cut_dim, ANNcoord &cut_val, int &n_lo)
Definition: kd_tree.h:72
virtual void getStats(int dim, ANNkdStats &st, ANNorthRect &bnd_box)
Definition: bd_tree.cpp:71
Definition: ANNx.h:48
int depth
Definition: ANNperf.h:54
ANNbd_tree(int n, int dd, int bs=1)
Definition: ANN.h:799
Definition: ANNx.h:46
const float BD_MAX_SPLIT_FAC
Definition: bd_tree.cpp:232
void sl_fair_split(ANNpointArray pa, ANNidxArray pidx, const ANNorthRect &bnds, int n, int dim, int &cut_dim, ANNcoord &cut_val, int &n_lo)
Definition: kd_split.cpp:346
double ANNcoord
Definition: ANN.h:158
Definition: ANNx.h:46
ANNkd_ptr rbd_tree(ANNpointArray pa, ANNidxArray pidx, int n, int dim, int bsp, ANNorthRect &bnd_box, ANNkd_splitter splitter, ANNshrinkRule shrink)
Definition: bd_tree.cpp:335
ANNdecomp trySimpleShrink(ANNpointArray pa, ANNidxArray pidx, int n, int dim, const ANNorthRect &bnd_box, ANNorthRect &inner_box)
Definition: bd_tree.cpp:181
void annAssignRect(int dim, ANNorthRect &dest, const ANNorthRect &source)
Definition: ANN.cpp:148
ANNpoint bnd_box_lo
Definition: ANN.h:713
ANNkd_ptr root
Definition: ANN.h:712
ANNdecomp
Definition: bd_tree.cpp:162
ANNkd_leaf * KD_TRIVIAL
Definition: kd_tree.cpp:50
void annError(const char *msg, ANNerr level)
Definition: ANN.cpp:169
ANNshrinkRule
Definition: ANN.h:605
ANNpoint bnd_box_hi
Definition: ANN.h:714
ANNpoint * ANNpointArray
Definition: ANN.h:376
void sl_midpt_split(ANNpointArray pa, ANNidxArray pidx, const ANNorthRect &bnds, int n, int dim, int &cut_dim, ANNcoord &cut_val, int &n_lo)
Definition: kd_split.cpp:146
virtual void print(int level, ostream &out)
Definition: bd_tree.cpp:41
ANNsplitRule split
Definition: ann_test.cpp:491
void reset(int d=0, int n=0, int bs=0)
Definition: ANNperf.h:59
ANNpoint lo
Definition: ANNx.h:93
ANNshrinkRule shrink
Definition: ann_test.cpp:492
int n_shr
Definition: ANNperf.h:53
void annBox2Bnds(const ANNorthRect &inner_box, const ANNorthRect &bnd_box, int dim, int &n_bnds, ANNorthHSArray &bnds)
Definition: kd_util.cpp:384
DLL_API ANNpoint annCopyPt(int dim, ANNpoint source)
Definition: ANN.cpp:140
ANNdecomp selectDecomp(ANNpointArray pa, ANNidxArray pidx, int n, int dim, const ANNorthRect &bnd_box, ANNkd_splitter splitter, ANNshrinkRule shrink, ANNorthRect &inner_box)
Definition: bd_tree.cpp:279
virtual void print(int level, ostream &out)=0
ANNsplitRule
Definition: ANN.h:596
const float BD_FRACTION
Definition: bd_tree.cpp:233
void annEnclRect(ANNpointArray pa, ANNidxArray pidx, int n, int dim, ANNorthRect &bnds)
Definition: kd_util.cpp:73
const float BD_GAP_THRESH
Definition: bd_tree.cpp:178
int n_bnds
Definition: bd_tree.h:63
void annBnds2Box(const ANNorthRect &bnd_box, int dim, int n_bnds, ANNorthHSArray bnds, ANNorthRect &inner_box)
Definition: kd_util.cpp:426
int dim
Definition: ann2fig.cpp:81
const int BD_CT_THRESH
Definition: bd_tree.cpp:179
ANNpoint hi
Definition: ANNx.h:94
void merge(const ANNkdStats &st)
Definition: kd_tree.cpp:133
void midpt_split(ANNpointArray pa, ANNidxArray pidx, const ANNorthRect &bnds, int n, int dim, int &cut_dim, ANNcoord &cut_val, int &n_lo)
Definition: kd_split.cpp:76
void kd_split(ANNpointArray pa, ANNidxArray pidx, const ANNorthRect &bnds, int n, int dim, int &cut_dim, ANNcoord &cut_val, int &n_lo)
Definition: kd_split.cpp:44
ANNpointArray pts
Definition: ANN.h:710
void annBoxSplit(ANNpointArray pa, ANNidxArray pidx, int n, int dim, ANNorthRect &box, int &n_in)
Definition: kd_util.cpp:332
ANNkd_ptr child[2]
Definition: bd_tree.h:65
virtual void getStats(int dim, ANNkdStats &st, ANNorthRect &bnd_box)=0
ANNidx * ANNidxArray
Definition: ANN.h:378
ANNorthHSArray bnds
Definition: bd_tree.h:64
ANNdecomp tryCentroidShrink(ANNpointArray pa, ANNidxArray pidx, int n, int dim, const ANNorthRect &bnd_box, ANNkd_splitter splitter, ANNorthRect &inner_box)
Definition: bd_tree.cpp:236
void fair_split(ANNpointArray pa, ANNidxArray pidx, const ANNorthRect &bnds, int n, int dim, int &cut_dim, ANNcoord &cut_val, int &n_lo)
Definition: kd_split.cpp:243

Generated on Thu Jun 11 2015 13:52:31 for queso-0.53.0 by  doxygen 1.8.5