queso-0.53.0
kd_tree.cpp
Go to the documentation of this file.
1 //----------------------------------------------------------------------
2 // File: kd_tree.cpp
3 // Programmer: Sunil Arya and David Mount
4 // Description: Basic methods for kd-trees.
5 // Last modified: 01/04/05 (Version 1.0)
6 //----------------------------------------------------------------------
7 // Copyright (c) 1997-2005 University of Maryland and Sunil Arya and
8 // David Mount. All Rights Reserved.
9 //
10 // This software and related documentation is part of the Approximate
11 // Nearest Neighbor Library (ANN). This software is provided under
12 // the provisions of the Lesser GNU Public License (LGPL). See the
13 // file ../ReadMe.txt for further information.
14 //
15 // The University of Maryland (U.M.) and the authors make no
16 // representations about the suitability or fitness of this software for
17 // any purpose. It is provided "as is" without express or implied
18 // warranty.
19 //----------------------------------------------------------------------
20 // History:
21 // Revision 0.1 03/04/98
22 // Initial release
23 // Revision 1.0 04/01/05
24 // Increased aspect ratio bound (ANN_AR_TOOBIG) from 100 to 1000.
25 // Fixed leaf counts to count trivial leaves.
26 // Added optional pa, pi arguments to Skeleton kd_tree constructor
27 // for use in load constructor.
28 // Added annClose() to eliminate KD_TRIVIAL memory leak.
29 //----------------------------------------------------------------------
30 
31 #include "kd_tree.h" // kd-tree declarations
32 #include "kd_split.h" // kd-tree splitting rules
33 #include "kd_util.h" // kd-tree utilities
34 #include <ANN/ANNperf.h> // performance evaluation
35 
36 //----------------------------------------------------------------------
37 // Global data
38 //
39 // For some splitting rules, especially with small bucket sizes,
40 // it is possible to generate a large number of empty leaf nodes.
41 // To save storage we allocate a single trivial leaf node which
42 // contains no points. For messy coding reasons it is convenient
43 // to have it reference a trivial point index.
44 //
45 // KD_TRIVIAL is allocated when the first kd-tree is created. It
46 // must *never* deallocated (since it may be shared by more than
47 // one tree).
48 //----------------------------------------------------------------------
49 static int IDX_TRIVIAL[] = {0}; // trivial point index
50 ANNkd_leaf *KD_TRIVIAL = NULL; // trivial leaf node
51 
52 //----------------------------------------------------------------------
53 // Printing the kd-tree
54 // These routines print a kd-tree in reverse inorder (high then
55 // root then low). (This is so that if you look at the output
56 // from the right side it appear from left to right in standard
57 // inorder.) When outputting leaves we output only the point
58 // indices rather than the point coordinates. There is an option
59 // to print the point coordinates separately.
60 //
61 // The tree printing routine calls the printing routines on the
62 // individual nodes of the tree, passing in the level or depth
63 // in the tree. The level in the tree is used to print indentation
64 // for readability.
65 //----------------------------------------------------------------------
66 
67 void ANNkd_split::print( // print splitting node
68  int level, // depth of node in tree
69  ostream &out) // output stream
70 {
71  child[ANN_HI]->print(level+1, out); // print high child
72  out << " ";
73  for (int i = 0; i < level; i++) // print indentation
74  out << "..";
75  out << "Split cd=" << cut_dim << " cv=" << cut_val;
76  out << " lbnd=" << cd_bnds[ANN_LO];
77  out << " hbnd=" << cd_bnds[ANN_HI];
78  out << "\n";
79  child[ANN_LO]->print(level+1, out); // print low child
80 }
81 
82 void ANNkd_leaf::print( // print leaf node
83  int level, // depth of node in tree
84  ostream &out) // output stream
85 {
86 
87  out << " ";
88  for (int i = 0; i < level; i++) // print indentation
89  out << "..";
90 
91  if (this == KD_TRIVIAL) { // canonical trivial leaf node
92  out << "Leaf (trivial)\n";
93  }
94  else{
95  out << "Leaf n=" << n_pts << " <";
96  for (int j = 0; j < n_pts; j++) {
97  out << bkt[j];
98  if (j < n_pts-1) out << ",";
99  }
100  out << ">\n";
101  }
102 }
103 
104 void ANNkd_tree::Print( // print entire tree
105  ANNbool with_pts, // print points as well?
106  ostream &out) // output stream
107 {
108  out << "ANN Version " << ANNversion << "\n";
109  if (with_pts) { // print point coordinates
110  out << " Points:\n";
111  for (int i = 0; i < n_pts; i++) {
112  out << "\t" << i << ": ";
113  annPrintPt(pts[i], dim, out);
114  out << "\n";
115  }
116  }
117  if (root == NULL) // empty tree?
118  out << " Null tree.\n";
119  else {
120  root->print(0, out); // invoke printing at root
121  }
122 }
123 
124 //----------------------------------------------------------------------
125 // kd_tree statistics (for performance evaluation)
126 // This routine compute various statistics information for
127 // a kd-tree. It is used by the implementors for performance
128 // evaluation of the data structure.
129 //----------------------------------------------------------------------
130 
131 #define MAX(a,b) ((a) > (b) ? (a) : (b))
132 
133 void ANNkdStats::merge(const ANNkdStats &st) // merge stats from child
134 {
135  n_lf += st.n_lf; n_tl += st.n_tl;
136  n_spl += st.n_spl; n_shr += st.n_shr;
137  depth = MAX(depth, st.depth);
138  sum_ar += st.sum_ar;
139 }
140 
141 //----------------------------------------------------------------------
142 // Update statistics for nodes
143 //----------------------------------------------------------------------
144 
145 const double ANN_AR_TOOBIG = 1000; // too big an aspect ratio
146 
147 void ANNkd_leaf::getStats( // get subtree statistics
148  int dim, // dimension of space
149  ANNkdStats &st, // stats (modified)
150  ANNorthRect &bnd_box) // bounding box
151 {
152  st.reset();
153  st.n_lf = 1; // count this leaf
154  if (this == KD_TRIVIAL) st.n_tl = 1; // count trivial leaf
155  double ar = annAspectRatio(dim, bnd_box); // aspect ratio of leaf
156  // incr sum (ignore outliers)
157  st.sum_ar += float(ar < ANN_AR_TOOBIG ? ar : ANN_AR_TOOBIG);
158 }
159 
160 void ANNkd_split::getStats( // get subtree statistics
161  int dim, // dimension of space
162  ANNkdStats &st, // stats (modified)
163  ANNorthRect &bnd_box) // bounding box
164 {
165  ANNkdStats ch_stats; // stats for children
166  // get stats for low child
167  ANNcoord hv = bnd_box.hi[cut_dim]; // save box bounds
168  bnd_box.hi[cut_dim] = cut_val; // upper bound for low child
169  ch_stats.reset(); // reset
170  child[ANN_LO]->getStats(dim, ch_stats, bnd_box);
171  st.merge(ch_stats); // merge them
172  bnd_box.hi[cut_dim] = hv; // restore bound
173  // get stats for high child
174  ANNcoord lv = bnd_box.lo[cut_dim]; // save box bounds
175  bnd_box.lo[cut_dim] = cut_val; // lower bound for high child
176  ch_stats.reset(); // reset
177  child[ANN_HI]->getStats(dim, ch_stats, bnd_box);
178  st.merge(ch_stats); // merge them
179  bnd_box.lo[cut_dim] = lv; // restore bound
180 
181  st.depth++; // increment depth
182  st.n_spl++; // increment number of splits
183 }
184 
185 //----------------------------------------------------------------------
186 // getStats
187 // Collects a number of statistics related to kd_tree or
188 // bd_tree.
189 //----------------------------------------------------------------------
190 
191 void ANNkd_tree::getStats( // get tree statistics
192  ANNkdStats &st) // stats (modified)
193 {
194  st.reset(dim, n_pts, bkt_size); // reset stats
195  // create bounding box
197  if (root != NULL) { // if nonempty tree
198  root->getStats(dim, st, bnd_box); // get statistics
199  st.avg_ar = st.sum_ar / st.n_lf; // average leaf asp ratio
200  }
201 }
202 
203 //----------------------------------------------------------------------
204 // kd_tree destructor
205 // The destructor just frees the various elements that were
206 // allocated in the construction process.
207 //----------------------------------------------------------------------
208 
209 ANNkd_tree::~ANNkd_tree() // tree destructor
210 {
211  if (root != NULL) delete root;
212  if (pidx != NULL) delete [] pidx;
213  if (bnd_box_lo != NULL) annDeallocPt(bnd_box_lo);
214  if (bnd_box_hi != NULL) annDeallocPt(bnd_box_hi);
215 }
216 
217 //----------------------------------------------------------------------
218 // This is called with all use of ANN is finished. It eliminates the
219 // minor memory leak caused by the allocation of KD_TRIVIAL.
220 //----------------------------------------------------------------------
221 void annClose() // close use of ANN
222 {
223  if (KD_TRIVIAL != NULL) {
224  delete KD_TRIVIAL;
225  KD_TRIVIAL = NULL;
226  }
227 }
228 
229 //----------------------------------------------------------------------
230 // kd_tree constructors
231 // There is a skeleton kd-tree constructor which sets up a
232 // trivial empty tree. The last optional argument allows
233 // the routine to be passed a point index array which is
234 // assumed to be of the proper size (n). Otherwise, one is
235 // allocated and initialized to the identity. Warning: In
236 // either case the destructor will deallocate this array.
237 //
238 // As a kludge, we need to allocate KD_TRIVIAL if one has not
239 // already been allocated. (This is because I'm too dumb to
240 // figure out how to cause a pointer to be allocated at load
241 // time.)
242 //----------------------------------------------------------------------
243 
244 void ANNkd_tree::SkeletonTree( // construct skeleton tree
245  int n, // number of points
246  int dd, // dimension
247  int bs, // bucket size
248  ANNpointArray pa, // point array
249  ANNidxArray pi) // point indices
250 {
251  dim = dd; // initialize basic elements
252  n_pts = n;
253  bkt_size = bs;
254  pts = pa; // initialize points array
255 
256  root = NULL; // no associated tree yet
257 
258  if (pi == NULL) { // point indices provided?
259  pidx = new ANNidx[n]; // no, allocate space for point indices
260  for (int i = 0; i < n; i++) {
261  pidx[i] = i; // initially identity
262  }
263  }
264  else {
265  pidx = pi; // yes, use them
266  }
267 
268  bnd_box_lo = bnd_box_hi = NULL; // bounding box is nonexistent
269  if (KD_TRIVIAL == NULL) // no trivial leaf node yet?
270  KD_TRIVIAL = new ANNkd_leaf(0, IDX_TRIVIAL); // allocate it
271 }
272 
273 ANNkd_tree::ANNkd_tree( // basic constructor
274  int n, // number of points
275  int dd, // dimension
276  int bs) // bucket size
277 { SkeletonTree(n, dd, bs); } // construct skeleton tree
278 
279 //----------------------------------------------------------------------
280 // rkd_tree - recursive procedure to build a kd-tree
281 //
282 // Builds a kd-tree for points in pa as indexed through the
283 // array pidx[0..n-1] (typically a subarray of the array used in
284 // the top-level call). This routine permutes the array pidx,
285 // but does not alter pa[].
286 //
287 // The construction is based on a standard algorithm for constructing
288 // the kd-tree (see Friedman, Bentley, and Finkel, ``An algorithm for
289 // finding best matches in logarithmic expected time,'' ACM Transactions
290 // on Mathematical Software, 3(3):209-226, 1977). The procedure
291 // operates by a simple divide-and-conquer strategy, which determines
292 // an appropriate orthogonal cutting plane (see below), and splits
293 // the points. When the number of points falls below the bucket size,
294 // we simply store the points in a leaf node's bucket.
295 //
296 // One of the arguments is a pointer to a splitting routine,
297 // whose prototype is:
298 //
299 // void split(
300 // ANNpointArray pa, // complete point array
301 // ANNidxArray pidx, // point array (permuted on return)
302 // ANNorthRect &bnds, // bounds of current cell
303 // int n, // number of points
304 // int dim, // dimension of space
305 // int &cut_dim, // cutting dimension
306 // ANNcoord &cut_val, // cutting value
307 // int &n_lo) // no. of points on low side of cut
308 //
309 // This procedure selects a cutting dimension and cutting value,
310 // partitions pa about these values, and returns the number of
311 // points on the low side of the cut.
312 //----------------------------------------------------------------------
313 
314 ANNkd_ptr rkd_tree( // recursive construction of kd-tree
315  ANNpointArray pa, // point array
316  ANNidxArray pidx, // point indices to store in subtree
317  int n, // number of points
318  int dim, // dimension of space
319  int bsp, // bucket space
320  ANNorthRect &bnd_box, // bounding box for current node
321  ANNkd_splitter splitter) // splitting routine
322 {
323  if (n <= bsp) { // n small, make a leaf node
324  if (n == 0) // empty leaf node
325  return KD_TRIVIAL; // return (canonical) empty leaf
326  else // construct the node and return
327  return new ANNkd_leaf(n, pidx);
328  }
329  else { // n large, make a splitting node
330  int cd; // cutting dimension
331  ANNcoord cv; // cutting value
332  int n_lo; // number on low side of cut
333  ANNkd_node *lo, *hi; // low and high children
334 
335  // invoke splitting procedure
336  (*splitter)(pa, pidx, bnd_box, n, dim, cd, cv, n_lo);
337 
338  ANNcoord lv = bnd_box.lo[cd]; // save bounds for cutting dimension
339  ANNcoord hv = bnd_box.hi[cd];
340 
341  bnd_box.hi[cd] = cv; // modify bounds for left subtree
342  lo = rkd_tree( // build left subtree
343  pa, pidx, n_lo, // ...from pidx[0..n_lo-1]
344  dim, bsp, bnd_box, splitter);
345  bnd_box.hi[cd] = hv; // restore bounds
346 
347  bnd_box.lo[cd] = cv; // modify bounds for right subtree
348  hi = rkd_tree( // build right subtree
349  pa, pidx + n_lo, n-n_lo,// ...from pidx[n_lo..n-1]
350  dim, bsp, bnd_box, splitter);
351  bnd_box.lo[cd] = lv; // restore bounds
352 
353  // create the splitting node
354  ANNkd_split *ptr = new ANNkd_split(cd, cv, lv, hv, lo, hi);
355 
356  return ptr; // return pointer to this node
357  }
358 }
359 
360 //----------------------------------------------------------------------
361 // kd-tree constructor
362 // This is the main constructor for kd-trees given a set of points.
363 // It first builds a skeleton tree, then computes the bounding box
364 // of the data points, and then invokes rkd_tree() to actually
365 // build the tree, passing it the appropriate splitting routine.
366 //----------------------------------------------------------------------
367 
368 ANNkd_tree::ANNkd_tree( // construct from point array
369  ANNpointArray pa, // point array (with at least n pts)
370  int n, // number of points
371  int dd, // dimension
372  int bs, // bucket size
373  ANNsplitRule split) // splitting method
374 {
375  SkeletonTree(n, dd, bs); // set up the basic stuff
376  pts = pa; // where the points are
377  if (n == 0) return; // no points--no sweat
378 
379  ANNorthRect bnd_box(dd); // bounding box for points
380  annEnclRect(pa, pidx, n, dd, bnd_box);// construct bounding rectangle
381  // copy to tree structure
382  bnd_box_lo = annCopyPt(dd, bnd_box.lo);
383  bnd_box_hi = annCopyPt(dd, bnd_box.hi);
384 
385  switch (split) { // build by rule
386  case ANN_KD_STD: // standard kd-splitting rule
387  root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, kd_split);
388  break;
389  case ANN_KD_MIDPT: // midpoint split
390  root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, midpt_split);
391  break;
392  case ANN_KD_FAIR: // fair split
393  root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, fair_split);
394  break;
395  case ANN_KD_SUGGEST: // best (in our opinion)
396  case ANN_KD_SL_MIDPT: // sliding midpoint split
397  root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, sl_midpt_split);
398  break;
399  case ANN_KD_SL_FAIR: // sliding fair split
400  root = rkd_tree(pa, pidx, n, dd, bs, bnd_box, sl_fair_split);
401  break;
402  default:
403  annError("Illegal splitting method", ANNabort);
404  }
405 }
void SkeletonTree(int n, int dd, int bs, ANNpointArray pa=NULL, ANNidxArray pi=NULL)
Definition: kd_tree.cpp:244
ANNidxArray pidx
Definition: ANN.h:711
DLL_API void annDeallocPt(ANNpoint &p)
Definition: ANN.cpp:127
ANNbool
Definition: ANN.h:132
void(* ANNkd_splitter)(ANNpointArray pa, ANNidxArray pidx, const ANNorthRect &bnds, int n, int dim, int &cut_dim, ANNcoord &cut_val, int &n_lo)
Definition: kd_tree.h:72
virtual void Print(ANNbool with_pts, std::ostream &out)
Definition: kd_tree.cpp:104
ANNcoord cd_bnds[2]
Definition: kd_tree.h:146
Definition: ANNx.h:48
virtual void getStats(ANNkdStats &st)
Definition: kd_tree.cpp:191
static int IDX_TRIVIAL[]
Definition: kd_tree.cpp:49
float sum_ar
Definition: ANNperf.h:55
int depth
Definition: ANNperf.h:54
ANNkd_ptr rkd_tree(ANNpointArray pa, ANNidxArray pidx, int n, int dim, int bsp, ANNorthRect &bnd_box, ANNkd_splitter splitter)
Definition: kd_tree.cpp:314
void sl_fair_split(ANNpointArray pa, ANNidxArray pidx, const ANNorthRect &bnds, int n, int dim, int &cut_dim, ANNcoord &cut_val, int &n_lo)
Definition: kd_split.cpp:346
double ANNcoord
Definition: ANN.h:158
int n_tl
Definition: ANNperf.h:51
virtual void getStats(int dim, ANNkdStats &st, ANNorthRect &bnd_box)
Definition: kd_tree.cpp:147
ANNkd_tree(int n=0, int dd=0, int bs=1)
Definition: kd_tree.cpp:273
const double ANN_AR_TOOBIG
Definition: kd_tree.cpp:145
virtual void print(int level, ostream &out)
Definition: kd_tree.cpp:67
int n_pts
Definition: kd_tree.h:93
ANNpoint bnd_box_lo
Definition: ANN.h:713
int bkt_size
Definition: ANN.h:709
Definition: ANNx.h:45
ANNkd_ptr root
Definition: ANN.h:712
DLL_API void annClose()
Definition: kd_tree.cpp:221
ANNkd_leaf * KD_TRIVIAL
Definition: kd_tree.cpp:50
int dim
Definition: ANN.h:707
ANNidxArray bkt
Definition: kd_tree.h:94
ANNkd_ptr child[2]
Definition: kd_tree.h:148
void annError(const char *msg, ANNerr level)
Definition: ANN.cpp:169
#define ANNversion
Definition: ANN.h:121
int n_pts
Definition: ANN.h:708
ANNpoint bnd_box_hi
Definition: ANN.h:714
ANNpoint * ANNpointArray
Definition: ANN.h:376
int n_spl
Definition: ANNperf.h:52
void sl_midpt_split(ANNpointArray pa, ANNidxArray pidx, const ANNorthRect &bnds, int n, int dim, int &cut_dim, ANNcoord &cut_val, int &n_lo)
Definition: kd_split.cpp:146
ANNsplitRule split
Definition: ann_test.cpp:491
void reset(int d=0, int n=0, int bs=0)
Definition: ANNperf.h:59
int ANNidx
Definition: ANN.h:175
ANNpoint lo
Definition: ANNx.h:93
double annAspectRatio(int dim, const ANNorthRect &bnd_box)
Definition: kd_util.cpp:52
int n_shr
Definition: ANNperf.h:53
int cut_dim
Definition: kd_tree.h:144
DLL_API ANNpoint annCopyPt(int dim, ANNpoint source)
Definition: ANN.cpp:140
virtual void print(int level, ostream &out)=0
ANNsplitRule
Definition: ANN.h:596
Definition: ANNx.h:45
void annEnclRect(ANNpointArray pa, ANNidxArray pidx, int n, int dim, ANNorthRect &bnds)
Definition: kd_util.cpp:73
float avg_ar
Definition: ANNperf.h:56
int dim
Definition: ann2fig.cpp:81
ANNpoint hi
Definition: ANNx.h:94
void merge(const ANNkdStats &st)
Definition: kd_tree.cpp:133
void midpt_split(ANNpointArray pa, ANNidxArray pidx, const ANNorthRect &bnds, int n, int dim, int &cut_dim, ANNcoord &cut_val, int &n_lo)
Definition: kd_split.cpp:76
virtual void getStats(int dim, ANNkdStats &st, ANNorthRect &bnd_box)
Definition: kd_tree.cpp:160
void kd_split(ANNpointArray pa, ANNidxArray pidx, const ANNorthRect &bnds, int n, int dim, int &cut_dim, ANNcoord &cut_val, int &n_lo)
Definition: kd_split.cpp:44
int n_lf
Definition: ANNperf.h:50
virtual void print(int level, ostream &out)
Definition: kd_tree.cpp:82
#define MAX(a, b)
Definition: kd_tree.cpp:131
ANNpointArray pts
Definition: ANN.h:710
virtual void getStats(int dim, ANNkdStats &st, ANNorthRect &bnd_box)=0
void annPrintPt(ANNpoint pt, int dim, std::ostream &out)
Definition: ANN.cpp:70
ANNidx * ANNidxArray
Definition: ANN.h:378
void fair_split(ANNpointArray pa, ANNidxArray pidx, const ANNorthRect &bnds, int n, int dim, int &cut_dim, ANNcoord &cut_val, int &n_lo)
Definition: kd_split.cpp:243
ANNcoord cut_val
Definition: kd_tree.h:145

Generated on Thu Jun 11 2015 13:52:31 for queso-0.53.0 by  doxygen 1.8.5