queso-0.57.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
ANN.h
Go to the documentation of this file.
1 //----------------------------------------------------------------------
2 // File: ANN.h
3 // Programmer: Sunil Arya and David Mount
4 // Description: Basic include file for approximate nearest
5 // neighbor searching.
6 // Last modified: 01/27/10 (Version 1.1.2)
7 //----------------------------------------------------------------------
8 // Copyright (c) 1997-2010 University of Maryland and Sunil Arya and
9 // David Mount. All Rights Reserved.
10 //
11 // This software and related documentation is part of the Approximate
12 // Nearest Neighbor Library (ANN). This software is provided under
13 // the provisions of the Lesser GNU Public License (LGPL). See the
14 // file ../ReadMe.txt for further information.
15 //
16 // The University of Maryland (U.M.) and the authors make no
17 // representations about the suitability or fitness of this software for
18 // any purpose. It is provided "as is" without express or implied
19 // warranty.
20 //----------------------------------------------------------------------
21 // History:
22 // Revision 0.1 03/04/98
23 // Initial release
24 // Revision 1.0 04/01/05
25 // Added copyright and revision information
26 // Added ANNcoordPrec for coordinate precision.
27 // Added methods theDim, nPoints, maxPoints, thePoints to ANNpointSet.
28 // Cleaned up C++ structure for modern compilers
29 // Revision 1.1 05/03/05
30 // Added fixed-radius k-NN searching
31 // Revision 1.1.2 01/27/10
32 // Fixed minor compilation bugs for new versions of gcc
33 //----------------------------------------------------------------------
34 
35 //----------------------------------------------------------------------
36 // ANN - approximate nearest neighbor searching
37 // ANN is a library for approximate nearest neighbor searching,
38 // based on the use of standard and priority search in kd-trees
39 // and balanced box-decomposition (bbd) trees. Here are some
40 // references to the main algorithmic techniques used here:
41 //
42 // kd-trees:
43 // Friedman, Bentley, and Finkel, ``An algorithm for finding
44 // best matches in logarithmic expected time,'' ACM
45 // Transactions on Mathematical Software, 3(3):209-226, 1977.
46 //
47 // Priority search in kd-trees:
48 // Arya and Mount, ``Algorithms for fast vector quantization,''
49 // Proc. of DCC '93: Data Compression Conference, eds. J. A.
50 // Storer and M. Cohn, IEEE Press, 1993, 381-390.
51 //
52 // Approximate nearest neighbor search and bbd-trees:
53 // Arya, Mount, Netanyahu, Silverman, and Wu, ``An optimal
54 // algorithm for approximate nearest neighbor searching,''
55 // 5th Ann. ACM-SIAM Symposium on Discrete Algorithms,
56 // 1994, 573-582.
57 //----------------------------------------------------------------------
58 
59 #ifndef ANN_H
60 #define ANN_H
61 
62 #ifdef WIN32
63  //----------------------------------------------------------------------
64  // For Microsoft Visual C++, externally accessible symbols must be
65  // explicitly indicated with DLL_API, which is somewhat like "extern."
66  //
67  // The following ifdef block is the standard way of creating macros
68  // which make exporting from a DLL simpler. All files within this DLL
69  // are compiled with the DLL_EXPORTS preprocessor symbol defined on the
70  // command line. In contrast, projects that use (or import) the DLL
71  // objects do not define the DLL_EXPORTS symbol. This way any other
72  // project whose source files include this file see DLL_API functions as
73  // being imported from a DLL, wheras this DLL sees symbols defined with
74  // this macro as being exported.
75  //----------------------------------------------------------------------
76  #ifdef DLL_EXPORTS
77  #define DLL_API __declspec(dllexport)
78  #else
79  #define DLL_API __declspec(dllimport)
80  #endif
81  //----------------------------------------------------------------------
82  // DLL_API is ignored for all other systems
83  //----------------------------------------------------------------------
84 #else
85  #define DLL_API
86 #endif
87 
88 //----------------------------------------------------------------------
89 // basic includes
90 //----------------------------------------------------------------------
91 
92 #include <cstdlib> // standard lib includes
93 #include <cmath> // math includes
94 #include <iostream> // I/O streams
95 #include <cstring> // C-style strings
96 
97 //----------------------------------------------------------------------
98 // Limits
99 // There are a number of places where we use the maximum double value as
100 // default initializers (and others may be used, depending on the
101 // data/distance representation). These can usually be found in limits.h
102 // (as LONG_MAX, INT_MAX) or in float.h (as DBL_MAX, FLT_MAX).
103 //
104 // Not all systems have these files. If you are using such a system,
105 // you should set the preprocessor symbol ANN_NO_LIMITS_H when
106 // compiling, and modify the statements below to generate the
107 // appropriate value. For practical purposes, this does not need to be
108 // the maximum double value. It is sufficient that it be at least as
109 // large than the maximum squared distance between between any two
110 // points.
111 //----------------------------------------------------------------------
112 #ifdef ANN_NO_LIMITS_H // limits.h unavailable
113  #include <cvalues> // replacement for limits.h
114  const double ANN_DBL_MAX = MAXDOUBLE; // insert maximum double
115 #else
116  #include <climits>
117  #include <cfloat>
118  const double ANN_DBL_MAX = DBL_MAX;
119 #endif
120 
121 #define ANNversion "1.1.2" // ANN version and information
122 #define ANNversionCmt ""
123 #define ANNcopyright "David M. Mount and Sunil Arya"
124 #define ANNlatestRev "Jan 27, 2010"
125 
126 //----------------------------------------------------------------------
127 // ANNbool
128 // This is a simple boolean type. Although ANSI C++ is supposed
129 // to support the type bool, some compilers do not have it.
130 //----------------------------------------------------------------------
131 
132 enum ANNbool {ANNfalse = 0, ANNtrue = 1}; // ANN boolean type (non ANSI C++)
133 
134 //----------------------------------------------------------------------
135 // ANNcoord, ANNdist
136 // ANNcoord and ANNdist are the types used for representing
137 // point coordinates and distances. They can be modified by the
138 // user, with some care. It is assumed that they are both numeric
139 // types, and that ANNdist is generally of an equal or higher type
140 // from ANNcoord. A variable of type ANNdist should be large
141 // enough to store the sum of squared components of a variable
142 // of type ANNcoord for the number of dimensions needed in the
143 // application. For example, the following combinations are
144 // legal:
145 //
146 // ANNcoord ANNdist
147 // --------- -------------------------------
148 // short short, int, long, float, double
149 // int int, long, float, double
150 // long long, float, double
151 // float float, double
152 // double double
153 //
154 // It is the user's responsibility to make sure that overflow does
155 // not occur in distance calculation.
156 //----------------------------------------------------------------------
157 
158 typedef double ANNcoord; // coordinate data type
159 typedef double ANNdist; // distance data type
160 
161 //----------------------------------------------------------------------
162 // ANNidx
163 // ANNidx is a point index. When the data structure is built, the
164 // points are given as an array. Nearest neighbor results are
165 // returned as an integer index into this array. To make it
166 // clearer when this is happening, we define the integer type
167 // ANNidx. Indexing starts from 0.
168 //
169 // For fixed-radius near neighbor searching, it is possible that
170 // there are not k nearest neighbors within the search radius. To
171 // indicate this, the algorithm returns ANN_NULL_IDX as its result.
172 // It should be distinguishable from any valid array index.
173 //----------------------------------------------------------------------
174 
175 typedef int ANNidx; // point index
176 const ANNidx ANN_NULL_IDX = -1; // a NULL point index
177 
178 //----------------------------------------------------------------------
179 // Infinite distance:
180 // The code assumes that there is an "infinite distance" which it
181 // uses to initialize distances before performing nearest neighbor
182 // searches. It should be as larger or larger than any legitimate
183 // nearest neighbor distance.
184 //
185 // On most systems, these should be found in the standard include
186 // file <limits.h> or possibly <float.h>. If you do not have these
187 // file, some suggested values are listed below, assuming 64-bit
188 // long, 32-bit int and 16-bit short.
189 //
190 // ANNdist ANN_DIST_INF Values (see <limits.h> or <float.h>)
191 // ------- ------------ ------------------------------------
192 // double DBL_MAX 1.79769313486231570e+308
193 // float FLT_MAX 3.40282346638528860e+38
194 // long LONG_MAX 0x7fffffffffffffff
195 // int INT_MAX 0x7fffffff
196 // short SHRT_MAX 0x7fff
197 //----------------------------------------------------------------------
198 
200 
201 //----------------------------------------------------------------------
202 // Significant digits for tree dumps:
203 // When floating point coordinates are used, the routine that dumps
204 // a tree needs to know roughly how many significant digits there
205 // are in a ANNcoord, so it can output points to full precision.
206 // This is defined to be ANNcoordPrec. On most systems these
207 // values can be found in the standard include files <limits.h> or
208 // <float.h>. For integer types, the value is essentially ignored.
209 //
210 // ANNcoord ANNcoordPrec Values (see <limits.h> or <float.h>)
211 // -------- ------------ ------------------------------------
212 // double DBL_DIG 15
213 // float FLT_DIG 6
214 // long doesn't matter 19
215 // int doesn't matter 10
216 // short doesn't matter 5
217 //----------------------------------------------------------------------
218 
219 #ifdef DBL_DIG // number of sig. bits in ANNcoord
220  const int ANNcoordPrec = DBL_DIG;
221 #else
222  const int ANNcoordPrec = 15; // default precision
223 #endif
224 
225 //----------------------------------------------------------------------
226 // Self match?
227 // In some applications, the nearest neighbor of a point is not
228 // allowed to be the point itself. This occurs, for example, when
229 // computing all nearest neighbors in a set. By setting the
230 // parameter ANN_ALLOW_SELF_MATCH to ANNfalse, the nearest neighbor
231 // is the closest point whose distance from the query point is
232 // strictly positive.
233 //----------------------------------------------------------------------
234 
236 
237 //----------------------------------------------------------------------
238 // Norms and metrics:
239 // ANN supports any Minkowski norm for defining distance. In
240 // particular, for any p >= 1, the L_p Minkowski norm defines the
241 // length of a d-vector (v0, v1, ..., v(d-1)) to be
242 //
243 // (|v0|^p + |v1|^p + ... + |v(d-1)|^p)^(1/p),
244 //
245 // (where ^ denotes exponentiation, and |.| denotes absolute
246 // value). The distance between two points is defined to be the
247 // norm of the vector joining them. Some common distance metrics
248 // include
249 //
250 // Euclidean metric p = 2
251 // Manhattan metric p = 1
252 // Max metric p = infinity
253 //
254 // In the case of the max metric, the norm is computed by taking
255 // the maxima of the absolute values of the components. ANN is
256 // highly "coordinate-based" and does not support general distances
257 // functions (e.g. those obeying just the triangle inequality). It
258 // also does not support distance functions based on
259 // inner-products.
260 //
261 // For the purpose of computing nearest neighbors, it is not
262 // necessary to compute the final power (1/p). Thus the only
263 // component that is used by the program is |v(i)|^p.
264 //
265 // ANN parameterizes the distance computation through the following
266 // macros. (Macros are used rather than procedures for
267 // efficiency.) Recall that the distance between two points is
268 // given by the length of the vector joining them, and the length
269 // or norm of a vector v is given by formula:
270 //
271 // |v| = ROOT(POW(v0) # POW(v1) # ... # POW(v(d-1)))
272 //
273 // where ROOT, POW are unary functions and # is an associative and
274 // commutative binary operator mapping the following types:
275 //
276 // ** POW: ANNcoord --> ANNdist
277 // ** #: ANNdist x ANNdist --> ANNdist
278 // ** ROOT: ANNdist (>0) --> double
279 //
280 // For early termination in distance calculation (partial distance
281 // calculation) we assume that POW and # together are monotonically
282 // increasing on sequences of arguments, meaning that for all
283 // v0..vk and y:
284 //
285 // POW(v0) #...# POW(vk) <= (POW(v0) #...# POW(vk)) # POW(y).
286 //
287 // Incremental Distance Calculation:
288 // The program uses an optimized method of computing distances for
289 // kd-trees and bd-trees, called incremental distance calculation.
290 // It is used when distances are to be updated when only a single
291 // coordinate of a point has been changed. In order to use this,
292 // we assume that there is an incremental update function DIFF(x,y)
293 // for #, such that if:
294 //
295 // s = x0 # ... # xi # ... # xk
296 //
297 // then if s' is equal to s but with xi replaced by y, that is,
298 //
299 // s' = x0 # ... # y # ... # xk
300 //
301 // then the length of s' can be computed by:
302 //
303 // |s'| = |s| # DIFF(xi,y).
304 //
305 // Thus, if # is + then DIFF(xi,y) is (yi-x). For the L_infinity
306 // norm we make use of the fact that in the program this function
307 // is only invoked when y > xi, and hence DIFF(xi,y)=y.
308 //
309 // Finally, for approximate nearest neighbor queries we assume
310 // that POW and ROOT are related such that
311 //
312 // v*ROOT(x) = ROOT(POW(v)*x)
313 //
314 // Here are the values for the various Minkowski norms:
315 //
316 // L_p: p even: p odd:
317 // ------------------------- ------------------------
318 // POW(v) = v^p POW(v) = |v|^p
319 // ROOT(x) = x^(1/p) ROOT(x) = x^(1/p)
320 // # = + # = +
321 // DIFF(x,y) = y - x DIFF(x,y) = y - x
322 //
323 // L_inf:
324 // POW(v) = |v|
325 // ROOT(x) = x
326 // # = max
327 // DIFF(x,y) = y
328 //
329 // By default the Euclidean norm is assumed. To change the norm,
330 // uncomment the appropriate set of macros below.
331 //----------------------------------------------------------------------
332 
333 //----------------------------------------------------------------------
334 // Use the following for the Euclidean norm
335 //----------------------------------------------------------------------
336 // #define ANN_POW(v) ((v)*(v))
337 // #define ANN_ROOT(x) sqrt(x)
338 // #define ANN_SUM(x,y) ((x) + (y))
339 // #define ANN_DIFF(x,y) ((y) - (x))
340 
341 //----------------------------------------------------------------------
342 // Use the following for the L_1 (Manhattan) norm
343 //----------------------------------------------------------------------
344 // #define ANN_POW(v) fabs(v)
345 // #define ANN_ROOT(x) (x)
346 // #define ANN_SUM(x,y) ((x) + (y))
347 // #define ANN_DIFF(x,y) ((y) - (x))
348 
349 //----------------------------------------------------------------------
350 // Use the following for a general L_p norm
351 //----------------------------------------------------------------------
352 // #define ANN_POW(v) pow(fabs(v),p)
353 // #define ANN_ROOT(x) pow(fabs(x),1/p)
354 // #define ANN_SUM(x,y) ((x) + (y))
355 // #define ANN_DIFF(x,y) ((y) - (x))
356 
357 //----------------------------------------------------------------------
358 // Use the following for the L_infinity (Max) norm
359 //----------------------------------------------------------------------
360 #define ANN_POW(v) fabs(v)
361 #define ANN_ROOT(x) (x)
362 #define ANN_SUM(x,y) ((x) > (y) ? (x) : (y))
363 #define ANN_DIFF(x,y) (y)
364 
365 //----------------------------------------------------------------------
366 // Array types
367 // The following array types are of basic interest. A point is
368 // just a dimensionless array of coordinates, a point array is a
369 // dimensionless array of points. A distance array is a
370 // dimensionless array of distances and an index array is a
371 // dimensionless array of point indices. The latter two are used
372 // when returning the results of k-nearest neighbor queries.
373 //----------------------------------------------------------------------
374 
375 typedef ANNcoord* ANNpoint; // a point
376 typedef ANNpoint* ANNpointArray; // an array of points
377 typedef ANNdist* ANNdistArray; // an array of distances
378 typedef ANNidx* ANNidxArray; // an array of point indices
379 
380 //----------------------------------------------------------------------
381 // Basic point and array utilities:
382 // The following procedures are useful supplements to ANN's nearest
383 // neighbor capabilities.
384 //
385 // annDist():
386 // Computes the (squared) distance between a pair of points.
387 // Note that this routine is not used internally by ANN for
388 // computing distance calculations. For reasons of efficiency
389 // this is done using incremental distance calculation. Thus,
390 // this routine cannot be modified as a method of changing the
391 // metric.
392 //
393 // Because points (somewhat like strings in C) are stored as
394 // pointers. Consequently, creating and destroying copies of
395 // points may require storage allocation. These procedures do
396 // this.
397 //
398 // annAllocPt() and annDeallocPt():
399 // Allocate a deallocate storage for a single point, and
400 // return a pointer to it. The argument to AllocPt() is
401 // used to initialize all components.
402 //
403 // annAllocPts() and annDeallocPts():
404 // Allocate and deallocate an array of points as well a
405 // place to store their coordinates, and initializes the
406 // points to point to their respective coordinates. It
407 // allocates point storage in a contiguous block large
408 // enough to store all the points. It performs no
409 // initialization.
410 //
411 // annCopyPt():
412 // Creates a copy of a given point, allocating space for
413 // the new point. It returns a pointer to the newly
414 // allocated copy.
415 //----------------------------------------------------------------------
416 
417 DLL_API ANNdist annDist(
418  int dim, // dimension of space
419  ANNpoint p, // points
420  ANNpoint q);
421 
422 DLL_API ANNpoint annAllocPt(
423  int dim, // dimension
424  ANNcoord c = 0); // coordinate value (all equal)
425 
426 DLL_API ANNpointArray annAllocPts(
427  int n, // number of points
428  int dim); // dimension
429 
430 DLL_API void annDeallocPt(
431  ANNpoint &p); // deallocate 1 point
432 
433 DLL_API void annDeallocPts(
434  ANNpointArray &pa); // point array
435 
436 DLL_API ANNpoint annCopyPt(
437  int dim, // dimension
438  ANNpoint source); // point to copy
439 
440 //----------------------------------------------------------------------
441 //Overall structure: ANN supports a number of different data structures
442 //for approximate and exact nearest neighbor searching. These are:
443 //
444 // ANNbruteForce A simple brute-force search structure.
445 // ANNkd_tree A kd-tree tree search structure. ANNbd_tree
446 // A bd-tree tree search structure (a kd-tree with shrink
447 // capabilities).
448 //
449 // At a minimum, each of these data structures support k-nearest
450 // neighbor queries. The nearest neighbor query, annkSearch,
451 // returns an integer identifier and the distance to the nearest
452 // neighbor(s) and annRangeSearch returns the nearest points that
453 // lie within a given query ball.
454 //
455 // Each structure is built by invoking the appropriate constructor
456 // and passing it (at a minimum) the array of points, the total
457 // number of points and the dimension of the space. Each structure
458 // is also assumed to support a destructor and member functions
459 // that return basic information about the point set.
460 //
461 // Note that the array of points is not copied by the data
462 // structure (for reasons of space efficiency), and it is assumed
463 // to be constant throughout the lifetime of the search structure.
464 //
465 // The search algorithm, annkSearch, is given the query point (q),
466 // and the desired number of nearest neighbors to report (k), and
467 // the error bound (eps) (whose default value is 0, implying exact
468 // nearest neighbors). It returns two arrays which are assumed to
469 // contain at least k elements: one (nn_idx) contains the indices
470 // (within the point array) of the nearest neighbors and the other
471 // (dd) contains the squared distances to these nearest neighbors.
472 //
473 // The search algorithm, annkFRSearch, is a fixed-radius kNN
474 // search. In addition to a query point, it is given a (squared)
475 // radius bound. (This is done for consistency, because the search
476 // returns distances as squared quantities.) It does two things.
477 // First, it computes the k nearest neighbors within the radius
478 // bound, and second, it returns the total number of points lying
479 // within the radius bound. It is permitted to set k = 0, in which
480 // case it effectively answers a range counting query. If the
481 // error bound epsilon is positive, then the search is approximate
482 // in the sense that it is free to ignore any point that lies
483 // outside a ball of radius r/(1+epsilon), where r is the given
484 // (unsquared) radius bound.
485 //
486 // The generic object from which all the search structures are
487 // dervied is given below. It is a virtual object, and is useless
488 // by itself.
489 //----------------------------------------------------------------------
490 
491 class DLL_API ANNpointSet {
492 public:
493  virtual ~ANNpointSet() {} // virtual distructor
494 
495  virtual void annkSearch( // approx k near neighbor search
496  ANNpoint q, // query point
497  int k, // number of near neighbors to return
498  ANNidxArray nn_idx, // nearest neighbor array (modified)
499  ANNdistArray dd, // dist to near neighbors (modified)
500  double eps=0.0 // error bound
501  ) = 0; // pure virtual (defined elsewhere)
502 
503  virtual int annkFRSearch( // approx fixed-radius kNN search
504  ANNpoint q, // query point
505  ANNdist sqRad, // squared radius
506  int k = 0, // number of near neighbors to return
507  ANNidxArray nn_idx = NULL, // nearest neighbor array (modified)
508  ANNdistArray dd = NULL, // dist to near neighbors (modified)
509  double eps=0.0 // error bound
510  ) = 0; // pure virtual (defined elsewhere)
511 
512  virtual int theDim() = 0; // return dimension of space
513  virtual int nPoints() = 0; // return number of points
514  // return pointer to points
515  virtual ANNpointArray thePoints() = 0;
516 };
517 
518 //----------------------------------------------------------------------
519 // Brute-force nearest neighbor search:
520 // The brute-force search structure is very simple but inefficient.
521 // It has been provided primarily for the sake of comparison with
522 // and validation of the more complex search structures.
523 //
524 // Query processing is the same as described above, but the value
525 // of epsilon is ignored, since all distance calculations are
526 // performed exactly.
527 //
528 // WARNING: This data structure is very slow, and should not be
529 // used unless the number of points is very small.
530 //
531 // Internal information:
532 // ---------------------
533 // This data structure bascially consists of the array of points
534 // (each a pointer to an array of coordinates). The search is
535 // performed by a simple linear scan of all the points.
536 //----------------------------------------------------------------------
537 
538 class DLL_API ANNbruteForce: public ANNpointSet {
539  int dim; // dimension
540  int n_pts; // number of points
541  ANNpointArray pts; // point array
542 public:
543  ANNbruteForce( // constructor from point array
544  ANNpointArray pa, // point array
545  int n, // number of points
546  int dd); // dimension
547 
548  ~ANNbruteForce(); // destructor
549 
550  void annkSearch( // approx k near neighbor search
551  ANNpoint q, // query point
552  int k, // number of near neighbors to return
553  ANNidxArray nn_idx, // nearest neighbor array (modified)
554  ANNdistArray dd, // dist to near neighbors (modified)
555  double eps=0.0); // error bound
556 
557  int annkFRSearch( // approx fixed-radius kNN search
558  ANNpoint q, // query point
559  ANNdist sqRad, // squared radius
560  int k = 0, // number of near neighbors to return
561  ANNidxArray nn_idx = NULL, // nearest neighbor array (modified)
562  ANNdistArray dd = NULL, // dist to near neighbors (modified)
563  double eps=0.0); // error bound
564 
565  int theDim() // return dimension of space
566  { return dim; }
567 
568  int nPoints() // return number of points
569  { return n_pts; }
570 
571  ANNpointArray thePoints() // return pointer to points
572  { return pts; }
573 };
574 
575 //----------------------------------------------------------------------
576 // kd- and bd-tree splitting and shrinking rules
577 // kd-trees supports a collection of different splitting rules.
578 // In addition to the standard kd-tree splitting rule proposed
579 // by Friedman, Bentley, and Finkel, we have introduced a
580 // number of other splitting rules, which seem to perform
581 // as well or better (for the distributions we have tested).
582 //
583 // The splitting methods given below allow the user to tailor
584 // the data structure to the particular data set. They are
585 // are described in greater details in the kd_split.cc source
586 // file. The method ANN_KD_SUGGEST is the method chosen (rather
587 // subjectively) by the implementors as the one giving the
588 // fastest performance, and is the default splitting method.
589 //
590 // As with splitting rules, there are a number of different
591 // shrinking rules. The shrinking rule ANN_BD_NONE does no
592 // shrinking (and hence produces a kd-tree tree). The rule
593 // ANN_BD_SUGGEST uses the implementors favorite rule.
594 //----------------------------------------------------------------------
595 
597  ANN_KD_STD = 0, // the optimized kd-splitting rule
598  ANN_KD_MIDPT = 1, // midpoint split
599  ANN_KD_FAIR = 2, // fair split
600  ANN_KD_SL_MIDPT = 3, // sliding midpoint splitting method
601  ANN_KD_SL_FAIR = 4, // sliding fair split method
602  ANN_KD_SUGGEST = 5}; // the authors' suggestion for best
603 const int ANN_N_SPLIT_RULES = 6; // number of split rules
604 
606  ANN_BD_NONE = 0, // no shrinking at all (just kd-tree)
607  ANN_BD_SIMPLE = 1, // simple splitting
608  ANN_BD_CENTROID = 2, // centroid splitting
609  ANN_BD_SUGGEST = 3}; // the authors' suggested choice
610 const int ANN_N_SHRINK_RULES = 4; // number of shrink rules
611 
612 //----------------------------------------------------------------------
613 // kd-tree:
614 // The main search data structure supported by ANN is a kd-tree.
615 // The main constructor is given a set of points and a choice of
616 // splitting method to use in building the tree.
617 //
618 // Construction:
619 // -------------
620 // The constructor is given the point array, number of points,
621 // dimension, bucket size (default = 1), and the splitting rule
622 // (default = ANN_KD_SUGGEST). The point array is not copied, and
623 // is assumed to be kept constant throughout the lifetime of the
624 // search structure. There is also a "load" constructor that
625 // builds a tree from a file description that was created by the
626 // Dump operation.
627 //
628 // Search:
629 // -------
630 // There are two search methods:
631 //
632 // Standard search (annkSearch()):
633 // Searches nodes in tree-traversal order, always visiting
634 // the closer child first.
635 // Priority search (annkPriSearch()):
636 // Searches nodes in order of increasing distance of the
637 // associated cell from the query point. For many
638 // distributions the standard search seems to work just
639 // fine, but priority search is safer for worst-case
640 // performance.
641 //
642 // Printing:
643 // ---------
644 // There are two methods provided for printing the tree. Print()
645 // is used to produce a "human-readable" display of the tree, with
646 // indenation, which is handy for debugging. Dump() produces a
647 // format that is suitable reading by another program. There is a
648 // "load" constructor, which constructs a tree which is assumed to
649 // have been saved by the Dump() procedure.
650 //
651 // Performance and Structure Statistics:
652 // -------------------------------------
653 // The procedure getStats() collects statistics information on the
654 // tree (its size, height, etc.) See ANNperf.h for information on
655 // the stats structure it returns.
656 //
657 // Internal information:
658 // ---------------------
659 // The data structure consists of three major chunks of storage.
660 // The first (implicit) storage are the points themselves (pts),
661 // which have been provided by the users as an argument to the
662 // constructor, or are allocated dynamically if the tree is built
663 // using the load constructor). These should not be changed during
664 // the lifetime of the search structure. It is the user's
665 // responsibility to delete these after the tree is destroyed.
666 //
667 // The second is the tree itself (which is dynamically allocated in
668 // the constructor) and is given as a pointer to its root node
669 // (root). These nodes are automatically deallocated when the tree
670 // is deleted. See the file src/kd_tree.h for further information
671 // on the structure of the tree nodes.
672 //
673 // Each leaf of the tree does not contain a pointer directly to a
674 // point, but rather contains a pointer to a "bucket", which is an
675 // array consisting of point indices. The third major chunk of
676 // storage is an array (pidx), which is a large array in which all
677 // these bucket subarrays reside. (The reason for storing them
678 // separately is the buckets are typically small, but of varying
679 // sizes. This was done to avoid fragmentation.) This array is
680 // also deallocated when the tree is deleted.
681 //
682 // In addition to this, the tree consists of a number of other
683 // pieces of information which are used in searching and for
684 // subsequent tree operations. These consist of the following:
685 //
686 // dim Dimension of space
687 // n_pts Number of points currently in the tree
688 // n_max Maximum number of points that are allowed
689 // in the tree
690 // bkt_size Maximum bucket size (no. of points per leaf)
691 // bnd_box_lo Bounding box low point
692 // bnd_box_hi Bounding box high point
693 // splitRule Splitting method used
694 //
695 //----------------------------------------------------------------------
696 
697 //----------------------------------------------------------------------
698 // Some types and objects used by kd-tree functions
699 // See src/kd_tree.h and src/kd_tree.cpp for definitions
700 //----------------------------------------------------------------------
701 class ANNkdStats; // stats on kd-tree
702 class ANNkd_node; // generic node in a kd-tree
703 typedef ANNkd_node* ANNkd_ptr; // pointer to a kd-tree node
704 
705 class DLL_API ANNkd_tree: public ANNpointSet {
706 protected:
707  int dim; // dimension of space
708  int n_pts; // number of points in tree
709  int bkt_size; // bucket size
710  ANNpointArray pts; // the points
711  ANNidxArray pidx; // point indices (to pts array)
712  ANNkd_ptr root; // root of kd-tree
713  ANNpoint bnd_box_lo; // bounding box low point
714  ANNpoint bnd_box_hi; // bounding box high point
715 
716  void SkeletonTree( // construct skeleton tree
717  int n, // number of points
718  int dd, // dimension
719  int bs, // bucket size
720  ANNpointArray pa = NULL, // point array (optional)
721  ANNidxArray pi = NULL); // point indices (optional)
722 
723 public:
724  ANNkd_tree( // build skeleton tree
725  int n = 0, // number of points
726  int dd = 0, // dimension
727  int bs = 1); // bucket size
728 
729  ANNkd_tree( // build from point array
730  ANNpointArray pa, // point array
731  int n, // number of points
732  int dd, // dimension
733  int bs = 1, // bucket size
734  ANNsplitRule split = ANN_KD_SUGGEST); // splitting method
735 
736  ANNkd_tree( // build from dump file
737  std::istream& in); // input stream for dump file
738 
739  ~ANNkd_tree(); // tree destructor
740 
741  void annkSearch( // approx k near neighbor search
742  ANNpoint q, // query point
743  int k, // number of near neighbors to return
744  ANNidxArray nn_idx, // nearest neighbor array (modified)
745  ANNdistArray dd, // dist to near neighbors (modified)
746  double eps=0.0); // error bound
747 
748  void annkPriSearch( // priority k near neighbor search
749  ANNpoint q, // query point
750  int k, // number of near neighbors to return
751  ANNidxArray nn_idx, // nearest neighbor array (modified)
752  ANNdistArray dd, // dist to near neighbors (modified)
753  double eps=0.0); // error bound
754 
755  int annkFRSearch( // approx fixed-radius kNN search
756  ANNpoint q, // the query point
757  ANNdist sqRad, // squared radius of query ball
758  int k, // number of neighbors to return
759  ANNidxArray nn_idx = NULL, // nearest neighbor array (modified)
760  ANNdistArray dd = NULL, // dist to near neighbors (modified)
761  double eps=0.0); // error bound
762 
763  int theDim() // return dimension of space
764  { return dim; }
765 
766  int nPoints() // return number of points
767  { return n_pts; }
768 
769  ANNpointArray thePoints() // return pointer to points
770  { return pts; }
771 
772  virtual void Print( // print the tree (for debugging)
773  ANNbool with_pts, // print points as well?
774  std::ostream& out); // output stream
775 
776  virtual void Dump( // dump entire tree
777  ANNbool with_pts, // print points as well?
778  std::ostream& out); // output stream
779 
780  virtual void getStats( // compute tree statistics
781  ANNkdStats& st); // the statistics (modified)
782 };
783 
784 //----------------------------------------------------------------------
785 // Box decomposition tree (bd-tree)
786 // The bd-tree is inherited from a kd-tree. The main difference
787 // in the bd-tree and the kd-tree is a new type of internal node
788 // called a shrinking node (in the kd-tree there is only one type
789 // of internal node, a splitting node). The shrinking node
790 // makes it possible to generate balanced trees in which the
791 // cells have bounded aspect ratio, by allowing the decomposition
792 // to zoom in on regions of dense point concentration. Although
793 // this is a nice idea in theory, few point distributions are so
794 // densely clustered that this is really needed.
795 //----------------------------------------------------------------------
796 
797 class DLL_API ANNbd_tree: public ANNkd_tree {
798 public:
799  ANNbd_tree( // build skeleton tree
800  int n, // number of points
801  int dd, // dimension
802  int bs = 1) // bucket size
803  : ANNkd_tree(n, dd, bs) {} // build base kd-tree
804 
805  ANNbd_tree( // build from point array
806  ANNpointArray pa, // point array
807  int n, // number of points
808  int dd, // dimension
809  int bs = 1, // bucket size
810  ANNsplitRule split = ANN_KD_SUGGEST, // splitting rule
811  ANNshrinkRule shrink = ANN_BD_SUGGEST); // shrinking rule
812 
813  ANNbd_tree( // build from dump file
814  std::istream& in); // input stream for dump file
815 };
816 
817 //----------------------------------------------------------------------
818 // Other functions
819 // annMaxPtsVisit Sets a limit on the maximum number of points
820 // to visit in the search.
821 // annClose Can be called when all use of ANN is finished.
822 // It clears up a minor memory leak.
823 //----------------------------------------------------------------------
824 
825 DLL_API void annMaxPtsVisit( // max. pts to visit in search
826  int maxPts); // the limit
827 
828 DLL_API void annClose(); // called to end use of ANN
829 
830 #endif
DLL_API void annClose()
Definition: kd_tree.cpp:221
ANNidxArray pidx
Definition: ANN.h:711
int nPoints()
Definition: ANN.h:766
int ANNidx
Definition: ANN.h:175
ANNpoint * ANNpointArray
Definition: ANN.h:376
DLL_API void annDeallocPt(ANNpoint &p)
Definition: ANN.cpp:127
const int ANN_N_SPLIT_RULES
Definition: ANN.h:603
ANNpointArray pts
Definition: ANN.h:541
double ANNcoord
Definition: ANN.h:158
ANNbd_tree(int n, int dd, int bs=1)
Definition: ANN.h:799
double ANNdist
Definition: ANN.h:159
int nPoints()
Definition: ANN.h:568
ANNpointArray thePoints()
Definition: ANN.h:571
virtual int annkFRSearch(ANNpoint q, ANNdist sqRad, int k=0, ANNidxArray nn_idx=NULL, ANNdistArray dd=NULL, double eps=0.0)=0
int theDim()
Definition: ANN.h:565
ANNcoord * ANNpoint
Definition: ANN.h:375
const ANNidx ANN_NULL_IDX
Definition: ANN.h:176
int n_pts
Definition: ANN.h:540
const int ANN_N_SHRINK_RULES
Definition: ANN.h:610
ANNpoint bnd_box_lo
Definition: ANN.h:713
int bkt_size
Definition: ANN.h:709
const int ANNcoordPrec
Definition: ANN.h:220
ANNkd_ptr root
Definition: ANN.h:712
virtual void annkSearch(ANNpoint q, int k, ANNidxArray nn_idx, ANNdistArray dd, double eps=0.0)=0
DLL_API void annDeallocPts(ANNpointArray &pa)
Definition: ANN.cpp:133
int dim
Definition: ANN.h:707
int dim
Definition: ANN.h:539
int n_pts
Definition: ANN.h:708
ANNpoint bnd_box_hi
Definition: ANN.h:714
const double ANN_DBL_MAX
Definition: ANN.h:114
DLL_API ANNpoint annAllocPt(int dim, ANNcoord c=0)
Definition: ANN.cpp:110
ANNsplitRule split
Definition: ann_test.cpp:491
DLL_API ANNdist annDist(int dim, ANNpoint p, ANNpoint q)
Definition: ANN.cpp:46
ANNdist * ANNdistArray
Definition: ANN.h:377
ANNshrinkRule shrink
Definition: ann_test.cpp:492
ANNkd_node * ANNkd_ptr
Definition: ANN.h:702
virtual ~ANNpointSet()
Definition: ANN.h:493
int dim
Definition: ann_test.cpp:472
Definition: ANN.h:132
int theDim()
Definition: ANN.h:763
DLL_API ANNpoint annCopyPt(int dim, ANNpoint source)
Definition: ANN.cpp:140
DLL_API void annMaxPtsVisit(int maxPts)
Definition: ANN.cpp:197
ANNsplitRule
Definition: ANN.h:596
ANNpointArray thePoints()
Definition: ANN.h:769
ANNidx * ANNidxArray
Definition: ANN.h:378
DLL_API ANNpointArray annAllocPts(int n, int dim)
Definition: ANN.cpp:117
Definition: ANN.h:132
const ANNbool ANN_ALLOW_SELF_MATCH
Definition: ANN.h:235
const ANNdist ANN_DIST_INF
Definition: ANN.h:199
ANNbool
Definition: ANN.h:132
ANNpointArray pts
Definition: ANN.h:710
ANNshrinkRule
Definition: ANN.h:605

Generated on Tue Jun 5 2018 19:48:54 for queso-0.57.1 by  doxygen 1.8.5