queso-0.57.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
InfoTheory_impl.h
Go to the documentation of this file.
1 //-----------------------------------------------------------------------bl-
2 //--------------------------------------------------------------------------
3 //
4 // QUESO - a library to support the Quantification of Uncertainty
5 // for Estimation, Simulation and Optimization
6 //
7 // Copyright (C) 2008-2017 The PECOS Development Team
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the Version 2.1 GNU Lesser General
11 // Public License as published by the Free Software Foundation.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc. 51 Franklin Street, Fifth Floor,
21 // Boston, MA 02110-1301 USA
22 //
23 //-----------------------------------------------------------------------el-
24 
25 #include <queso/Defines.h>
26 
27 #ifdef QUESO_HAS_ANN
28 #include <queso/GslVector.h>
29 #include <queso/GslMatrix.h>
30 #include <queso/GaussianVectorRV.h>
31 #include <queso/InfoTheory.h>
32 
33 #include <gsl/gsl_sf_psi.h> // todo: take specificity of gsl_, i.e., make it general (gsl or boost or etc)
34 
35 namespace QUESO {
36 
37 //*****************************************************
38 // Function: distANN_XY
39 //*****************************************************
40 void distANN_XY( const ANNpointArray dataX, const ANNpointArray dataY,
41  double* distsXY,
42  unsigned int dimX, unsigned int dimY,
43  unsigned int xN, unsigned int yN,
44  unsigned int k, double eps )
45 {
46 
47  ANNkd_tree* kdTree;
48  ANNdistArray nnDist;
49  ANNidxArray nnIdx;
50 
51  // Allocate memory
52  nnIdx = new ANNidx[ k+1 ];
53  nnDist = new ANNdist[ k+1 ];
54  kdTree = new ANNkd_tree( dataY, yN, dimY );
55 
56  // Get the distances to all the points
57  for( unsigned int i = 0; i < xN ; i++ )
58  {
59  kdTree->annkSearch( dataX[ i ], k+1, nnIdx, nnDist, eps );
60 
61  double my_dist = nnDist[ k ];
62 
63  // check to see if the dist is zero (query point same as the kNN)
64  // if so find the next k that gives the next positive distance
65  if( my_dist == 0.0 )
66  {
67  ANNdistArray nnDist_tmp = new ANNdist[ yN ];
68  ANNidxArray nnIdx_tmp = new ANNidx[ yN ];
69  kdTree->annkSearch( dataX[ i ], yN, nnIdx_tmp, nnDist_tmp, eps );
70 
71  for( unsigned int my_k = k + 1; my_k < yN; ++my_k )
72  if( nnDist_tmp[ my_k ] > 0.0 )
73  {
74  my_dist = nnDist_tmp[ my_k ];
75  break;
76  }
77  delete [] nnIdx_tmp;
78  delete [] nnDist_tmp;
79  }
80 
81  distsXY[ i ] = my_dist;
82  }
83 
84  // Deallocate memory
85  delete [] nnIdx;
86  delete [] nnDist;
87  delete kdTree;
88  annClose();
89 
90  return;
91 }
92 
93 //*****************************************************
94 // Function: normalizeANN_XY
95 // (used by Mutual Information - marginal normalization
96 // it does not destroy the correlations)
97 //*****************************************************
98 void normalizeANN_XY( ANNpointArray dataXY, unsigned int dimXY,
99  ANNpointArray dataX, unsigned int dimX,
100  ANNpointArray dataY, unsigned int dimY,
101  unsigned int N )
102 {
103 
104  ANNpoint meanXY, stdXY;
105 
106  meanXY = annAllocPt(dimXY); // is init with 0
107  stdXY = annAllocPt(dimXY); // is init with 0
108 
109  // get mean
110  for( unsigned int i = 0; i < N; i++ ) {
111  for( unsigned int j = 0; j < dimX; j++ ) {
112  meanXY[ j ] += dataXY[ i ][ j ];
113  }
114  for( unsigned int j = 0; j < dimY; j++ ) {
115  meanXY[ dimX + j ] += dataXY[ i ][ dimX + j ];
116  }
117  }
118  for( unsigned int j = 0; j < dimXY; j++ ) {
119  meanXY[ j ] = meanXY[ j ] / (double)N;
120  }
121 
122  // get standard deviation
123  for( unsigned int i = 0; i < N; i++ ) {
124  for( unsigned int j = 0; j < dimXY; j++ ) {
125  stdXY[ j ] += pow( dataXY[ i ][ j ] - meanXY[ j ], 2.0 );
126  }
127  }
128  for( unsigned int j = 0; j < dimXY; j++ ) {
129  stdXY[ j ] = sqrt( stdXY[ j ] / ((double)N-1.0) );
130  }
131 
132  /*
133  std::cout << "Mean RV - ";
134  annPrintPt( meanXY, dimXY, std::cout );
135  std::cout << std::endl << "Std RV - ";
136  annPrintPt( stdXY, dimXY, std::cout );
137  std::cout << std::endl;
138  */
139 
140  // get normalized points and populate marginals
141  for( unsigned int i = 0; i < N; i++ ) {
142  // normalize joint
143  for( unsigned int j = 0; j < dimXY; j++ ) {
144  dataXY[ i ][ j ] = ( dataXY[ i ][ j ] - meanXY[ j ] ) / stdXY[ j ];
145  }
146 
147  // populate marginals
148  for( unsigned int j = 0; j < dimX; j++ ) {
149  dataX[ i ][ j ] = dataXY[ i ][ j ];
150  }
151  for( unsigned int j = 0; j < dimY; j++ ) {
152  dataY[ i ][ j ] = dataXY[ i ][ dimX + j ];
153  }
154  }
155 
156 }
157 
158 //*****************************************************
159 // Function: computeMI_ANN
160 //*****************************************************
162  unsigned int dimX, unsigned int dimY,
163  unsigned int k, unsigned int N, double eps )
164 {
165 
166  ANNpointArray dataX, dataY;
167  double* distsXY;
168  double MI_est;
169  ANNkd_tree* kdTreeX;
170  ANNkd_tree* kdTreeY;
171 
172  unsigned int dimXY = dimX + dimY;
173 
174  // Allocate memory
175  dataX = annAllocPts(N,dimX);
176  dataY = annAllocPts(N,dimY);
177  distsXY = new double[N];
178 
179  // Normalize data and populate the marginals dataX, dataY
180  normalizeANN_XY( dataXY, dimXY, dataX, dimX, dataY, dimY, N);
181 
182  // Get distance to knn for each point
183  kdTreeX = new ANNkd_tree( dataX, N, dimX );
184  kdTreeY = new ANNkd_tree( dataY, N, dimY );
185  distANN_XY( dataXY, dataXY, distsXY, dimXY, dimXY, N, N, k, eps );
186 
187  // Compute mutual information
188  double marginal_contrib = 0.0;
189  for( unsigned int i = 0; i < N; i++ ) {
190  // get the number of points within a specified radius
191  int no_pts_X = kdTreeX->annkFRSearch( dataX[ i ], distsXY[ i ], 0, NULL, NULL, eps);
192  int no_pts_Y = kdTreeY->annkFRSearch( dataY[ i ], distsXY[ i ], 0, NULL, NULL, eps);
193  // digamma evaluations
194  marginal_contrib += gsl_sf_psi_int( no_pts_X+1 ) + gsl_sf_psi_int( no_pts_Y+1 );
195  }
196  MI_est = gsl_sf_psi_int( k ) + gsl_sf_psi_int( N ) - marginal_contrib / (double)N;
197 
198  // Deallocate memory
199  delete kdTreeX;
200  delete kdTreeY;
201  delete [] distsXY;
202  annDeallocPts( dataX );
203  annDeallocPts( dataY );
204 
205  return MI_est;
206 
207 }
208 
209 //*****************************************************
210 // Function: estimateMI_ANN (using a joint)
211 // (Mutual Information)
212 //*****************************************************
213 template<template <class P_V, class P_M> class RV, class P_V, class P_M>
214 double estimateMI_ANN( const RV<P_V,P_M>& jointRV,
215  const unsigned int xDimSel[], unsigned int dimX,
216  const unsigned int yDimSel[], unsigned int dimY,
217  unsigned int k, unsigned int N, double eps )
218 {
219  ANNpointArray dataXY;
220  double MI_est;
221 
222  unsigned int dimXY = dimX + dimY;
223 
224  // Allocate memory
225  dataXY = annAllocPts(N,dimXY);
226 
227  // Copy samples in ANN data structure
228  P_V smpRV( jointRV.imageSet().vectorSpace().zeroVector() );
229  for( unsigned int i = 0; i < N; i++ ) {
230  // get a sample from the distribution
231  jointRV.realizer().realization( smpRV );
232 
233  // copy the vector values in the ANN data structure
234  for( unsigned int j = 0; j < dimX; j++ ) {
235  dataXY[ i ][ j ] = smpRV[ xDimSel[j] ];
236  }
237  for( unsigned int j = 0; j < dimY; j++ ) {
238  dataXY[ i ][ dimX + j ] = smpRV[ yDimSel[j] ];
239  }
240  // annPrintPt( dataXY[i], dimXY, std::cout ); std::cout << std::endl;
241  }
242 
243  MI_est = computeMI_ANN( dataXY,
244  dimX, dimY,
245  k, N, eps );
246 
247  // Deallocate memory
248  annDeallocPts( dataXY );
249 
250  return MI_est;
251 }
252 
253 //*****************************************************
254 // Function: estimateMI_ANN (using two seperate RVs)
255 // (Mutual Information)
256 //*****************************************************
257 template<class P_V, class P_M,
258  template <class P_V, class P_M> class RV_1,
259  template <class P_V, class P_M> class RV_2>
260 double estimateMI_ANN( const RV_1<P_V,P_M>& xRV,
261  const RV_2<P_V,P_M>& yRV,
262  const unsigned int xDimSel[], unsigned int dimX,
263  const unsigned int yDimSel[], unsigned int dimY,
264  unsigned int k, unsigned int N, double eps )
265 {
266  ANNpointArray dataXY;
267  double MI_est;
268 
269  unsigned int dimXY = dimX + dimY;
270 
271  // Allocate memory
272  dataXY = annAllocPts(N,dimXY);
273 
274  // Copy samples in ANN data structure
275  P_V smpRV_x( xRV.imageSet().vectorSpace().zeroVector() );
276  P_V smpRV_y( yRV.imageSet().vectorSpace().zeroVector() );
277 
278  for( unsigned int i = 0; i < N; i++ ) {
279  // get a sample from the distribution
280  xRV.realizer().realization( smpRV_x );
281  yRV.realizer().realization( smpRV_y );
282 
283  // copy the vector values in the ANN data structure
284  for( unsigned int j = 0; j < dimX; j++ ) {
285  dataXY[ i ][ j ] = smpRV_x[ xDimSel[j] ];
286  }
287  for( unsigned int j = 0; j < dimY; j++ ) {
288  dataXY[ i ][ dimX + j ] = smpRV_y[ yDimSel[j] ];
289  }
290  // annPrintPt( dataXY[i], dimXY, std::cout ); std::cout << std::endl;
291  }
292 
293  MI_est = computeMI_ANN( dataXY,
294  dimX, dimY,
295  k, N, eps );
296 
297  // Deallocate memory
298  annDeallocPts( dataXY );
299 
300  return MI_est;
301 }
302 
303 //*****************************************************
304 // Function: estimateKL_ANN
305 // (Kullback-Leibler divergence)
306 //*****************************************************
307 template <class P_V, class P_M,
308  template <class P_V, class P_M> class RV_1,
309  template <class P_V, class P_M> class RV_2>
310 double estimateKL_ANN( RV_1<P_V,P_M>& xRV,
311  RV_2<P_V,P_M>& yRV,
312  unsigned int xDimSel[], unsigned int dimX,
313  unsigned int yDimSel[], unsigned int dimY,
314  unsigned int xN, unsigned int yN,
315  unsigned int k, double eps )
316 {
317  ANNpointArray dataX;
318  ANNpointArray dataY;
319  double* distsX;
320  double* distsXY;
321  double KL_est;
322 
323  // sanity check
324  if( dimX != dimY ) {
325  queso_error_msg("Error-KL: the dimensions should agree");
326  }
327 
328  // Allocate memory
329  dataX = annAllocPts( xN, dimX );
330  dataY = annAllocPts( yN, dimY );
331  distsX = new double[xN];
332  distsXY = new double[xN];
333 
334  // Copy X samples in ANN data structure
335  P_V xSmpRV( xRV.imageSet().vectorSpace().zeroVector() );
336  for( unsigned int i = 0; i < xN; i++ ) {
337  // get a sample from the distribution
338  xRV.realizer().realization( xSmpRV );
339  // copy the vector values in the ANN data structure
340  for( unsigned int j = 0; j < dimX; j++ ) {
341  dataX[ i ][ j ] = xSmpRV[ xDimSel[j] ];
342  }
343  }
344 
345  // Copy Y samples in ANN data structure
346  P_V ySmpRV( yRV.imageSet().vectorSpace().zeroVector() );
347  for( unsigned int i = 0; i < yN; i++ ) {
348  // get a sample from the distribution
349  yRV.realizer().realization( ySmpRV );
350  // copy the vector values in the ANN data structure
351  for( unsigned int j = 0; j < dimY; j++ ) {
352  dataY[ i ][ j ] = ySmpRV[ yDimSel[j] ];
353  }
354  }
355 
356  // Get distance to knn for each point
357  distANN_XY( dataX, dataX, distsX, dimX, dimX, xN, xN, k+1, eps ); // k+1 because the 1st nn is itself
358  distANN_XY( dataX, dataY, distsXY, dimX, dimY, xN, yN, k, eps );
359 
360  // Compute KL-divergence estimate
361  double sum_log_ratio = 0.0;
362  for( unsigned int i = 0; i < xN; i++ )
363  {
364  sum_log_ratio += log( distsXY[i] / distsX[i] );
365  }
366  KL_est = (double)dimX/(double)xN * sum_log_ratio + log( (double)yN / ((double)xN-1.0 ) );
367 
368  // Deallocate memory
369  annDeallocPts( dataX );
370  annDeallocPts( dataY );
371  delete [] distsX;
372  delete [] distsXY;
373 
374  return KL_est;
375 }
376 
377 
378 //*****************************************************
379 // Function: estimateCE_ANN
380 // (Cross Entropy)
381 //*****************************************************
382 template <class P_V, class P_M,
383  template <class P_V, class P_M> class RV_1,
384  template <class P_V, class P_M> class RV_2>
385 double estimateCE_ANN( RV_1<P_V,P_M>& xRV,
386  RV_2<P_V,P_M>& yRV,
387  unsigned int xDimSel[], unsigned int dimX,
388  unsigned int yDimSel[], unsigned int dimY,
389  unsigned int xN, unsigned int yN,
390  unsigned int k, double eps )
391 {
392  ANNpointArray dataX;
393  ANNpointArray dataY;
394  double* distsXY;
395  double CE_est;
396 
397  // sanity check
398  if( dimX != dimY ) {
399  queso_error_msg("Error-CE: the dimensions should agree");
400  }
401 
402  // Allocate memory
403  dataX = annAllocPts( xN, dimX );
404  dataY = annAllocPts( yN, dimY );
405  distsXY = new double[xN];
406 
407  // Copy X samples in ANN data structure
408  P_V xSmpRV( xRV.imageSet().vectorSpace().zeroVector() );
409  for( unsigned int i = 0; i < xN; i++ ) {
410  // get a sample from the distribution
411  xRV.realizer().realization( xSmpRV );
412  // copy the vector values in the ANN data structure
413  for( unsigned int j = 0; j < dimX; j++ ) {
414  dataX[ i ][ j ] = xSmpRV[ xDimSel[j] ];
415  }
416  }
417 
418  // Copy Y samples in ANN data structure
419  P_V ySmpRV( yRV.imageSet().vectorSpace().zeroVector() );
420  for( unsigned int i = 0; i < yN; i++ ) {
421  // get a sample from the distribution
422  yRV.realizer().realization( ySmpRV );
423  // copy the vector values in the ANN data structure
424  for( unsigned int j = 0; j < dimY; j++ ) {
425  dataY[ i ][ j ] = ySmpRV[ yDimSel[j] ];
426  }
427  }
428 
429  // Get distance to knn for each point
430  distANN_XY( dataX, dataY, distsXY, dimX, dimY, xN, yN, k, eps );
431 
432  // Compute cross entropy estimate
433  double sum_log = 0.0;
434  for( unsigned int i = 0; i < xN; i++ )
435  {
436  sum_log += log( distsXY[i] );
437  }
438  CE_est = (double)dimX/(double)xN * sum_log + log( (double)yN ) - gsl_sf_psi_int( k );
439 
440  // Deallocate memory
441  annDeallocPts( dataX );
442  annDeallocPts( dataY );
443  delete [] distsXY;
444 
445  return CE_est;
446 }
447 
448 } // End namespace QUESO
449 
450 #endif // QUESO_HAS_ANN
DLL_API void annClose()
Definition: kd_tree.cpp:221
int ANNidx
Definition: ANN.h:175
ANNpoint * ANNpointArray
Definition: ANN.h:376
int annkFRSearch(ANNpoint q, ANNdist sqRad, int k, ANNidxArray nn_idx=NULL, ANNdistArray dd=NULL, double eps=0.0)
double ANNdist
Definition: ANN.h:159
double estimateMI_ANN(const RV< P_V, P_M > &jointRV, const unsigned int xDimSel[], unsigned int dimX, const unsigned int yDimSel[], unsigned int dimY, unsigned int k, unsigned int N, double eps)
void annkSearch(ANNpoint q, int k, ANNidxArray nn_idx, ANNdistArray dd, double eps=0.0)
Definition: kd_search.cpp:89
ANNcoord * ANNpoint
Definition: ANN.h:375
double estimateCE_ANN(RV_1< P_V, P_M > &xRV, RV_2< P_V, P_M > &yRV, unsigned int xDimSel[], unsigned int dimX, unsigned int yDimSel[], unsigned int dimY, unsigned int xN, unsigned int yN, unsigned int k, double eps)
void distANN_XY(const ANNpointArray dataX, const ANNpointArray dataY, double *distsXY, unsigned int dimX, unsigned int dimY, unsigned int xN, unsigned int yN, unsigned int k, double eps)
DLL_API void annDeallocPts(ANNpointArray &pa)
Definition: ANN.cpp:133
DLL_API ANNpoint annAllocPt(int dim, ANNcoord c=0)
Definition: ANN.cpp:110
ANNdist * ANNdistArray
Definition: ANN.h:377
double estimateKL_ANN(RV_1< P_V, P_M > &xRV, RV_2< P_V, P_M > &yRV, unsigned int xDimSel[], unsigned int dimX, unsigned int yDimSel[], unsigned int dimY, unsigned int xN, unsigned int yN, unsigned int k, double eps)
void normalizeANN_XY(ANNpointArray dataXY, unsigned int dimXY, ANNpointArray dataX, unsigned int dimX, ANNpointArray dataY, unsigned int dimY, unsigned int N)
ANNidx * ANNidxArray
Definition: ANN.h:378
DLL_API ANNpointArray annAllocPts(int n, int dim)
Definition: ANN.cpp:117
double computeMI_ANN(ANNpointArray dataXY, unsigned int dimX, unsigned int dimY, unsigned int k, unsigned int N, double eps)

Generated on Tue Jun 5 2018 19:48:55 for queso-0.57.1 by  doxygen 1.8.5