51 #ifndef CLOCKS_PER_SEC // define clocks-per-second if needed
52 #define CLOCKS_PER_SEC 1000000
284 const double ERR = 0.00001;
381 cerr <<
"ann_test: ERROR------->" << msg <<
"<-------------ERROR\n";
385 cerr <<
"ann_test: WARNING----->" << msg <<
"<-------------WARNING\n";
394 for (
int i = 0; i <
dim; i++) {
396 if (i < dim-1) cout <<
",";
407 for (i = 0; i < size; i++) {
408 if (!strcmp(arg,
table[i]))
return i;
599 do { in.get(ch); }
while (isspace(ch) && !in.eof());
600 while (ch ==
'#' && !in.eof()) {
602 do { in.get(ch); }
while(ch !=
'\n' && !in.eof());
604 do { in.get(ch); }
while(isspace(ch) && !in.eof());
628 int main(
int argc,
char** argv)
634 cout <<
"------------------------------------------------------------\n"
638 <<
"------------------------------------------------------------\n\n";
650 if (!strcmp(directive,
"dim")) {
653 else if (!strcmp(directive,
"colors")) {
656 else if (!strcmp(directive,
"new_clust")) {
659 else if (!strcmp(directive,
"max_clus_dim")) {
662 else if (!strcmp(directive,
"std_dev")) {
665 else if (!strcmp(directive,
"std_dev_lo")) {
668 else if (!strcmp(directive,
"std_dev_hi")) {
671 else if (!strcmp(directive,
"corr_coef")) {
674 else if (!strcmp(directive,
"data_size")) {
677 else if (!strcmp(directive,
"query_size")) {
680 else if (!strcmp(directive,
"bucket_size")) {
683 else if (!strcmp(directive,
"epsilon")) {
686 else if (!strcmp(directive,
"max_pts_visit")) {
690 else if (!strcmp(directive,
"radius_bound")) {
694 else if (!strcmp(directive,
"near_neigh")) {
699 else if (!strcmp(directive,
"true_near_neigh")) {
708 else if (!strcmp(directive,
"seed")) {
715 else if (!strcmp(directive,
"validate")) {
717 if (!strcmp(arg,
"on")) {
719 cout <<
"validate = on "
720 <<
"(Warning: this may slow execution time.)\n";
722 else if (!strcmp(arg,
"off")) {
726 cerr <<
"Argument: " << arg <<
"\n";
727 Error(
"validate argument must be \"on\" or \"off\"",
ANNabort);
733 else if (!strcmp(directive,
"distribution")) {
737 cerr <<
"Distribution: " << arg <<
"\n";
744 else if (!strcmp(directive,
"stats")) {
748 cerr <<
"Stats level: " << arg <<
"\n";
752 cout <<
"stats = " << arg <<
"\n";
757 else if (!strcmp(directive,
"split_rule")) {
761 cerr <<
"Splitting rule: " << arg <<
"\n";
768 else if (!strcmp(directive,
"shrink_rule")) {
772 cerr <<
"Shrinking rule: " << arg <<
"\n";
779 else if (!strcmp(directive,
"output_label")) {
782 cout <<
"<" << arg <<
">\n";
787 else if (!strcmp(directive,
"gen_data_pts")) {
789 Error(
"Cannot use planted distribution for data points",
ANNabort);
805 else if (!strcmp(directive,
"gen_query_pts")) {
808 Error(
"Must generate data points before query points for planted distribution",
ANNabort);
831 else if (!strcmp(directive,
"read_data_pts")) {
843 else if (!strcmp(directive,
"read_query_pts")) {
858 else if (!strcmp(directive,
"build_ann")) {
878 long prep_time = clock() - clock0;
881 cout <<
"[Build ann-structure:\n";
884 cout <<
" data_size = " <<
data_size <<
"\n";
885 cout <<
" dim = " <<
dim <<
"\n";
889 cout <<
" process_time = "
899 cout <<
" (Structure Contents:\n";
909 else if (!strcmp(directive,
"dump")) {
912 Error(
"Cannot dump. No tree has been built yet",
ANNwarn);
916 ofstream out_dump_file(arg);
917 if (!out_dump_file) {
918 cerr <<
"File name: " << arg <<
"\n";
924 cout <<
"(Tree has been dumped to file " << arg <<
")\n";
933 else if (!strcmp(directive,
"load")) {
942 ifstream in_dump_file(arg);
944 cerr <<
"File name: " << arg <<
"\n";
957 cout <<
"(Tree has been loaded from file " << arg <<
")\n";
960 cout <<
" (Structure Contents:\n";
984 else if (!strcmp(directive,
"run_queries")) {
989 enum {STANDARD, PRIORITY}
method;
992 if (!strcmp(arg,
"standard")) {
995 else if (!strcmp(arg,
"priority")) {
999 cerr <<
"Search type: " << arg <<
"\n";
1000 Error(
"Search type must be \"standard\" or \"priority\"",
1004 Error(
"Either data set and query set not constructed",
ANNabort);
1014 #ifdef ANN_PERF // performance only
1044 if (
method == STANDARD) {
1052 else if (
method == PRIORITY) {
1065 if (
method != STANDARD) {
1066 Error(
"A nonzero radius bound assumes standard search",
1085 long query_time = clock() - clock0;
1097 cout <<
"[Run Queries:\n";
1098 cout <<
" query_size = " << query_size <<
"\n";
1099 cout <<
" dim = " <<
dim <<
"\n";
1100 cout <<
" search_method = " << arg <<
"\n";
1101 cout <<
" epsilon = " <<
epsilon <<
"\n";
1102 cout <<
" near_neigh = " <<
near_neigh <<
"\n";
1108 cout <<
" true_nn = " <<
true_nn <<
"\n";
1111 cout <<
" query_time = " <<
1115 cout <<
" (biased by perf measurements)";
1125 cout <<
" (Performance statistics unavailable.)\n";
1130 cout <<
" (Query Results:\n";
1131 cout <<
" Pt\tANN\tDist\n";
1136 cout <<
" " << setw(4) << i;
1140 cout <<
"\t[no other pts in radius bound]\n";
1144 cout <<
"\t" << curr_nn_idx[j]
1150 if (radius_bound != 0) {
1151 cout <<
" pts_in_radius_bound = "
1167 cerr <<
"Directive: " << directive <<
"\n";
1183 return EXIT_SUCCESS;
1237 if(type ==
DATA) cout <<
"[Generating Data Points:\n";
1238 else cout <<
"[Generating Query Points:\n";
1239 cout <<
" number = " << n <<
"\n";
1240 cout <<
" dim = " <<
dim <<
"\n";
1243 cout <<
" seed = " <<
annIdum <<
"\n";
1246 cout <<
" std_dev = " <<
std_dev <<
"\n";
1248 cout <<
" std_dev = " << std_dev <<
" (small) \n";
1249 cout <<
" std_dev_lo = " <<
std_dev_lo <<
"\n";
1250 cout <<
" std_dev_hi = " <<
std_dev_hi <<
"\n";
1253 cout <<
" corr_coef = " <<
corr_coef <<
"\n";
1256 cout <<
" colors = " <<
n_color <<
"\n";
1258 cout <<
" (cluster centers regenerated)\n";
1261 cout <<
" max_dim = " <<
max_dim <<
"\n";
1267 if(type ==
DATA) cout <<
"(Data Points:\n";
1268 else cout <<
"(Query Points:\n";
1269 for (
int i = 0; i < n; i++) {
1270 cout <<
" " << setw(4) << i <<
"\t";
1293 ifstream in_file(file_nm);
1295 cerr <<
"File name: " << file_nm <<
"\n";
1302 for (i = 0; i < n; i++) {
1303 if (!(in_file >> pa[i][0]))
break;
1304 for (
int d = 1; d <
dim; d++) {
1305 in_file >> pa[i][d];
1310 in_file >> ignore_me;
1311 if (!in_file.eof()) {
1313 Error(
"`data_size' too small. Input file truncated.",
ANNwarn);
1315 Error(
"`query_size' too small. Input file truncated.",
ANNwarn);
1324 cout <<
"[Read Data Points:\n";
1325 cout <<
" data_size = " << n <<
"\n";
1328 cout <<
"[Read Query Points:\n";
1329 cout <<
" query_size = " << n <<
"\n";
1331 cout <<
" file_name = " << file_nm <<
"\n";
1332 cout <<
" dim = " <<
dim <<
"\n";
1336 cout <<
" (Points:\n";
1337 for (i = 0; i < n; i++) {
1338 cout <<
" " << i <<
"\t";
1377 cout <<
"(Computing true nearest neighbors for validation. This may take time.)\n";
1476 Error(
"Cannot validate with fewer true near neighbors than actual",
ANNabort);
1501 double true_dist =
ANN_ROOT(curr_tru_dst[j]);
1503 double rept_dist =
ANN_ROOT(curr_apx_dst[j]);
1505 if (rept_dist < true_dist*(1-
ERR)) {
1506 Error(
"INTERNAL ERROR: True nearest neighbor incorrect",
1511 if (true_dist == 0.0) {
1513 else resultErr = 0.0;
1516 resultErr = (rept_dist - true_dist) / ((
double) true_dist);
1520 Error(
"INTERNAL ERROR: Actual error exceeds epsilon",
1535 double rnkErr = 0.0;
1537 ANNdist rept_dist = curr_apx_dst[j];
1539 while (rnk <
true_nn && curr_tru_dst[rnk] <= rept_dist)
1541 if (j+1-rnk > 0) rnkErr = (double) (j+1-rnk);
1551 Error(
"INTERNAL ERROR: Invalid fixed-radius range count",
1570 #define log2(x) (log(x)/log(2.0)) // log base 2
1576 const int MIN_PTS = 20;
1577 const float MAX_FRAC_TL = 0.50;
1578 const float MAX_AVG_AR = 20;
1587 int too_many_nodes = 10*opt_n_nodes;
1588 if (st.
n_pts >= MIN_PTS && n_nodes > too_many_nodes) {
1589 out <<
"-----------------------------------------------------------\n";
1590 out <<
"Warning: The tree has more than 10x as many nodes as points.\n";
1591 out <<
"You may want to consider a different split or shrink method.\n";
1592 out <<
"-----------------------------------------------------------\n";
1596 float frac_tl = (st.
n_lf == 0 ? 0 : ((float) st.
n_tl)/ st.
n_lf);
1597 if (st.
n_pts >= MIN_PTS && frac_tl > MAX_FRAC_TL) {
1598 out <<
"-----------------------------------------------------------\n";
1599 out <<
"Warning: A significant fraction of leaves contain no points.\n";
1600 out <<
"You may want to consider a different split or shrink method.\n";
1601 out <<
"-----------------------------------------------------------\n";
1605 int too_many_levels = (int) (2.0 * st.
dim *
log2((
double) st.
n_pts));
1607 if (st.
n_pts >= MIN_PTS && st.
depth > too_many_levels) {
1608 out <<
"-----------------------------------------------------------\n";
1609 out <<
"Warning: The tree is more than 2x as deep as (dim*log n).\n";
1610 out <<
"You may want to consider a different split or shrink method.\n";
1611 out <<
"-----------------------------------------------------------\n";
1615 if (st.
n_pts >= MIN_PTS && st.
avg_ar > MAX_AVG_AR) {
1616 out <<
"-----------------------------------------------------------\n";
1617 out <<
"Warning: Average aspect ratio of cells is quite large.\n";
1618 out <<
"This may slow queries depending on the point distribution.\n";
1619 out <<
"-----------------------------------------------------------\n";
1627 out <<
" (Structure Statistics:\n";
1628 out <<
" n_nodes = " << n_nodes
1629 <<
" (opt = " << opt_n_nodes
1630 <<
", best if < " << too_many_nodes <<
")\n"
1631 <<
" n_leaves = " << st.
n_lf
1632 <<
" (" << st.
n_tl <<
" contain no points)\n"
1633 <<
" n_splits = " << st.
n_spl <<
"\n"
1634 <<
" n_shrinks = " << st.
n_shr <<
"\n";
1635 out <<
" empty_leaves = " << frac_tl*100
1636 <<
" percent (best if < " << MAX_FRAC_TL*100 <<
" percent)\n";
1637 out <<
" depth = " << st.
depth
1638 <<
" (opt = " << opt_levels
1639 <<
", best if < " << too_many_levels <<
")\n";
1640 out <<
" avg_aspect_ratio = " << st.
avg_ar
1641 <<
" (best if < " << MAX_AVG_AR <<
")\n";
const char shrink_table[N_SHRINK_RULES][STRING_LEN]
DLL_API void annPrintStats(ANNbool validate)
void annClusGaussPts(ANNpointArray pa, int n, int dim, int n_clus, ANNbool new_clust, double std_dev)
and that you are informed that you can do these things To protect your we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it For if you distribute copies of the whether gratis or for a you must give the recipients all the rights that we gave you You must make sure that receive or can get the source code If you link other code with the you must provide complete object files to the so that they can relink them with the library after making changes to the library and recompiling it And you must show them these terms so they know their rights We protect your rights with a two step method
virtual void Print(ANNbool with_pts, std::ostream &out)
void printPoint(ANNpoint p, int dim)
virtual void getStats(ANNkdStats &st)
DLL_API void annResetStats(int data_size)
int annkFRSearch(ANNpoint q, ANNdist sqRad, int k, ANNidxArray nn_idx=NULL, ANNdistArray dd=NULL, double eps=0.0)
const ANNshrinkRule def_shrink
DLL_API ANNsampStat ann_average_err
DLL_API void annMaxPtsVisit(int maxPts)
const double def_corr_coef
void annkSearch(ANNpoint q, int k, ANNidxArray nn_idx, ANNdistArray dd, double eps=0.0)
const ANNbool def_validate
const ANNsplitRule def_split
const ANNidx ANN_NULL_IDX
DLL_API void annUpdateStats()
int annkFRSearch(ANNpoint q, ANNdist sqRad, int k=0, ANNidxArray nn_idx=NULL, ANNdistArray dd=NULL, double eps=0.0)
const int def_bucket_size
DLL_API void annDeallocPts(ANNpointArray &pa)
void generatePts(ANNpointArray &pa, int n, PtType type, ANNbool new_clust, ANNpointArray src=NULL, int n_src=0)
void annkPriSearch(ANNpoint q, int k, ANNidxArray nn_idx, ANNdistArray dd, double eps=0.0)
main(int argc, char **argv)
ANNbool getDirective(istream &in, char *directive)
void annPlanted(ANNpointArray pa, int n, int dim, ANNpointArray src, int n_src, double std_dev)
void treeStats(ostream &out, ANNbool verbose)
void readPts(ANNpointArray &pa, int &n, char *file_nm, PtType type)
void annClusEllipsoids(ANNpointArray pa, int n, int dim, int n_clus, ANNbool new_clust, double std_dev_small, double std_dev_lo, double std_dev_hi, int max_dim)
void annLaplacePts(ANNpointArray pa, int n, int dim)
void annGaussPts(ANNpointArray pa, int n, int dim, double std_dev)
const char split_table[N_SPLIT_RULES][STRING_LEN]
void Error(const char *msg, ANNerr level)
const char distr_table[N_DISTRIBS][STRING_LEN]
void annCoGaussPts(ANNpointArray pa, int n, int dim, double correlation)
const ANNbool def_new_clust
ANNpointArray thePoints()
ANNbool skipComment(istream &in)
const char stat_table[N_STAT_LEVELS][STRING_LEN]
int lookUp(const char *arg, const char(*table)[STRING_LEN], int size)
DLL_API ANNsampStat ann_rank_err
void annUniformPts(ANNpointArray pa, int n, int dim)
DLL_API ANNpointArray annAllocPts(int n, int dim)
virtual void Dump(ANNbool with_pts, std::ostream &out)
DLL_API void annResetCounts()
and distribute a copy of this License along with the Library You may charge a fee for the physical act of transferring a and you may at your option offer warranty protection in exchange for a fee You may modify your copy or copies of the Library or any portion of thus forming a work based on the and copy and distribute such modifications or work under the terms of Section provided that you also meet all of these other than as an argument passed when the facility is then you must make a good faith effort to ensure in the event an application does not supply such function or table
void annkSearch(ANNpoint q, int k, ANNidxArray nn_idx, ANNdistArray dd, double eps=0.0)
void annCoLaplacePts(ANNpointArray pa, int n, int dim, double correlation)
void annClusOrthFlats(ANNpointArray pa, int n, int dim, int n_clus, ANNbool new_clust, double std_dev, int max_dim)