queso-0.53.0
InfoTheory.C
Go to the documentation of this file.
1 //-----------------------------------------------------------------------bl-
2 //--------------------------------------------------------------------------
3 //
4 // QUESO - a library to support the Quantification of Uncertainty
5 // for Estimation, Simulation and Optimization
6 //
7 // Copyright (C) 2008-2015 The PECOS Development Team
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the Version 2.1 GNU Lesser General
11 // Public License as published by the Free Software Foundation.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc. 51 Franklin Street, Fifth Floor,
21 // Boston, MA 02110-1301 USA
22 //
23 //-----------------------------------------------------------------------el-
24 
25 #include <queso/InfoTheory.h>
26 
27 #ifdef QUESO_HAS_ANN
28 
29 namespace QUESO {
30 
31 //*****************************************************
32 // Function: distANN_XY
33 //*****************************************************
34 void distANN_XY( const ANNpointArray dataX, const ANNpointArray dataY,
35  double* distsXY,
36  unsigned int dimX, unsigned int dimY,
37  unsigned int xN, unsigned int yN,
38  unsigned int k, double eps )
39 {
40 
41  ANNkd_tree* kdTree;
42  ANNdistArray nnDist;
43  ANNidxArray nnIdx;
44 
45  // Allocate memory
46  nnIdx = new ANNidx[ k+1 ];
47  nnDist = new ANNdist[ k+1 ];
48  kdTree = new ANNkd_tree( dataY, yN, dimY );
49 
50  // Get the distances to all the points
51  for( unsigned int i = 0; i < xN ; i++ )
52  {
53  kdTree->annkSearch( dataX[ i ], k+1, nnIdx, nnDist, eps );
54 
55  double my_dist = nnDist[ k ];
56 
57  // check to see if the dist is zero (query point same as the kNN)
58  // if so find the next k that gives the next positive distance
59  if( my_dist == 0.0 )
60  {
61  ANNdistArray nnDist_tmp = new ANNdist[ yN ];
62  ANNidxArray nnIdx_tmp = new ANNidx[ yN ];
63  kdTree->annkSearch( dataX[ i ], yN, nnIdx_tmp, nnDist_tmp, eps );
64 
65  for( unsigned int my_k = k + 1; my_k < yN; ++my_k )
66  if( nnDist_tmp[ my_k ] > 0.0 )
67  {
68  my_dist = nnDist_tmp[ my_k ];
69  break;
70  }
71  delete [] nnIdx_tmp;
72  delete [] nnDist_tmp;
73  }
74 
75  distsXY[ i ] = my_dist;
76  }
77 
78  // Deallocate memory
79  delete [] nnIdx;
80  delete [] nnDist;
81  delete kdTree;
82  annClose();
83 
84  return;
85 }
86 
87 //*****************************************************
88 // Function: normalizeANN_XY
89 // (used by Mutual Information - marginal normalization
90 // it does not destroy the correlations)
91 //*****************************************************
92 void normalizeANN_XY( ANNpointArray dataXY, unsigned int dimXY,
93  ANNpointArray dataX, unsigned int dimX,
94  ANNpointArray dataY, unsigned int dimY,
95  unsigned int N )
96 {
97 
98  ANNpoint meanXY, stdXY;
99 
100  meanXY = annAllocPt(dimXY); // is init with 0
101  stdXY = annAllocPt(dimXY); // is init with 0
102 
103  // get mean
104  for( unsigned int i = 0; i < N; i++ ) {
105  for( unsigned int j = 0; j < dimX; j++ ) {
106  meanXY[ j ] += dataXY[ i ][ j ];
107  }
108  for( unsigned int j = 0; j < dimY; j++ ) {
109  meanXY[ dimX + j ] += dataXY[ i ][ dimX + j ];
110  }
111  }
112  for( unsigned int j = 0; j < dimXY; j++ ) {
113  meanXY[ j ] = meanXY[ j ] / (double)N;
114  }
115 
116  // get standard deviation
117  for( unsigned int i = 0; i < N; i++ ) {
118  for( unsigned int j = 0; j < dimXY; j++ ) {
119  stdXY[ j ] += pow( dataXY[ i ][ j ] - meanXY[ j ], 2.0 );
120  }
121  }
122  for( unsigned int j = 0; j < dimXY; j++ ) {
123  stdXY[ j ] = sqrt( stdXY[ j ] / ((double)N-1.0) );
124  }
125 
126  /*
127  std::cout << "Mean RV - ";
128  annPrintPt( meanXY, dimXY, std::cout );
129  std::cout << std::endl << "Std RV - ";
130  annPrintPt( stdXY, dimXY, std::cout );
131  std::cout << std::endl;
132  */
133 
134  // get normalized points and populate marginals
135  for( unsigned int i = 0; i < N; i++ ) {
136  // normalize joint
137  for( unsigned int j = 0; j < dimXY; j++ ) {
138  dataXY[ i ][ j ] = ( dataXY[ i ][ j ] - meanXY[ j ] ) / stdXY[ j ];
139  }
140 
141  // populate marginals
142  for( unsigned int j = 0; j < dimX; j++ ) {
143  dataX[ i ][ j ] = dataXY[ i ][ j ];
144  }
145  for( unsigned int j = 0; j < dimY; j++ ) {
146  dataY[ i ][ j ] = dataXY[ i ][ dimX + j ];
147  }
148  }
149 
150 }
151 
152 //*****************************************************
153 // Function: computeMI_ANN
154 //*****************************************************
155 double computeMI_ANN( ANNpointArray dataXY,
156  unsigned int dimX, unsigned int dimY,
157  unsigned int k, unsigned int N, double eps )
158 {
159 
160  ANNpointArray dataX, dataY;
161  double* distsXY;
162  double MI_est;
163  ANNkd_tree* kdTreeX;
164  ANNkd_tree* kdTreeY;
165 
166  unsigned int dimXY = dimX + dimY;
167 
168  // Allocate memory
169  dataX = annAllocPts(N,dimX);
170  dataY = annAllocPts(N,dimY);
171  distsXY = new double[N];
172 
173  // Normalize data and populate the marginals dataX, dataY
174  normalizeANN_XY( dataXY, dimXY, dataX, dimX, dataY, dimY, N);
175 
176  // Get distance to knn for each point
177  kdTreeX = new ANNkd_tree( dataX, N, dimX );
178  kdTreeY = new ANNkd_tree( dataY, N, dimY );
179  distANN_XY( dataXY, dataXY, distsXY, dimXY, dimXY, N, N, k, eps );
180 
181  // Compute mutual information
182  double marginal_contrib = 0.0;
183  for( unsigned int i = 0; i < N; i++ ) {
184  // get the number of points within a specified radius
185  int no_pts_X = kdTreeX->annkFRSearch( dataX[ i ], distsXY[ i ], 0, NULL, NULL, eps);
186  int no_pts_Y = kdTreeY->annkFRSearch( dataY[ i ], distsXY[ i ], 0, NULL, NULL, eps);
187  // digamma evaluations
188  marginal_contrib += gsl_sf_psi_int( no_pts_X+1 ) + gsl_sf_psi_int( no_pts_Y+1 );
189  }
190  MI_est = gsl_sf_psi_int( k ) + gsl_sf_psi_int( N ) - marginal_contrib / (double)N;
191 
192  // Deallocate memory
193  delete kdTreeX;
194  delete kdTreeY;
195  delete [] distsXY;
196  annDeallocPts( dataX );
197  annDeallocPts( dataY );
198 
199  return MI_est;
200 
201 }
202 
203 //*****************************************************
204 // Function: estimateMI_ANN (using a joint)
205 // (Mutual Information)
206 //*****************************************************
207 template<template <class P_V, class P_M> class RV, class P_V, class P_M>
208 double estimateMI_ANN( const RV<P_V,P_M>& jointRV,
209  const unsigned int xDimSel[], unsigned int dimX,
210  const unsigned int yDimSel[], unsigned int dimY,
211  unsigned int k, unsigned int N, double eps )
212 {
213  ANNpointArray dataXY;
214  double MI_est;
215 
216  unsigned int dimXY = dimX + dimY;
217 
218  // Allocate memory
219  dataXY = annAllocPts(N,dimXY);
220 
221  // Copy samples in ANN data structure
222  P_V smpRV( jointRV.imageSet().vectorSpace().zeroVector() );
223  for( unsigned int i = 0; i < N; i++ ) {
224  // get a sample from the distribution
225  jointRV.realizer().realization( smpRV );
226 
227  // copy the vector values in the ANN data structure
228  for( unsigned int j = 0; j < dimX; j++ ) {
229  dataXY[ i ][ j ] = smpRV[ xDimSel[j] ];
230  }
231  for( unsigned int j = 0; j < dimY; j++ ) {
232  dataXY[ i ][ dimX + j ] = smpRV[ yDimSel[j] ];
233  }
234  // annPrintPt( dataXY[i], dimXY, std::cout ); std::cout << std::endl;
235  }
236 
237  MI_est = computeMI_ANN( dataXY,
238  dimX, dimY,
239  k, N, eps );
240 
241  // Deallocate memory
242  annDeallocPts( dataXY );
243 
244  return MI_est;
245 }
246 
247 //*****************************************************
248 // Function: estimateMI_ANN (using two seperate RVs)
249 // (Mutual Information)
250 //*****************************************************
251 template<class P_V, class P_M,
252  template <class P_V, class P_M> class RV_1,
253  template <class P_V, class P_M> class RV_2>
254 double estimateMI_ANN( const RV_1<P_V,P_M>& xRV,
255  const RV_2<P_V,P_M>& yRV,
256  const unsigned int xDimSel[], unsigned int dimX,
257  const unsigned int yDimSel[], unsigned int dimY,
258  unsigned int k, unsigned int N, double eps )
259 {
260  ANNpointArray dataXY;
261  double MI_est;
262 
263  unsigned int dimXY = dimX + dimY;
264 
265  // Allocate memory
266  dataXY = annAllocPts(N,dimXY);
267 
268  // Copy samples in ANN data structure
269  P_V smpRV_x( xRV.imageSet().vectorSpace().zeroVector() );
270  P_V smpRV_y( yRV.imageSet().vectorSpace().zeroVector() );
271 
272  for( unsigned int i = 0; i < N; i++ ) {
273  // get a sample from the distribution
274  xRV.realizer().realization( smpRV_x );
275  yRV.realizer().realization( smpRV_y );
276 
277  // copy the vector values in the ANN data structure
278  for( unsigned int j = 0; j < dimX; j++ ) {
279  dataXY[ i ][ j ] = smpRV_x[ xDimSel[j] ];
280  }
281  for( unsigned int j = 0; j < dimY; j++ ) {
282  dataXY[ i ][ dimX + j ] = smpRV_y[ yDimSel[j] ];
283  }
284  // annPrintPt( dataXY[i], dimXY, std::cout ); std::cout << std::endl;
285  }
286 
287  MI_est = computeMI_ANN( dataXY,
288  dimX, dimY,
289  k, N, eps );
290 
291  // Deallocate memory
292  annDeallocPts( dataXY );
293 
294  return MI_est;
295 }
296 
297 //*****************************************************
298 // Function: estimateKL_ANN
299 // (Kullback-Leibler divergence)
300 //*****************************************************
301 template <class P_V, class P_M,
302  template <class P_V, class P_M> class RV_1,
303  template <class P_V, class P_M> class RV_2>
304 double estimateKL_ANN( RV_1<P_V,P_M>& xRV,
305  RV_2<P_V,P_M>& yRV,
306  unsigned int xDimSel[], unsigned int dimX,
307  unsigned int yDimSel[], unsigned int dimY,
308  unsigned int xN, unsigned int yN,
309  unsigned int k, double eps )
310 {
311  ANNpointArray dataX;
312  ANNpointArray dataY;
313  double* distsX;
314  double* distsXY;
315  double KL_est;
316 
317  // sanity check
318  if( dimX != dimY ) {
319  queso_error_msg("Error-KL: the dimensions should agree");
320  }
321 
322  // Allocate memory
323  dataX = annAllocPts( xN, dimX );
324  dataY = annAllocPts( yN, dimY );
325  distsX = new double[xN];
326  distsXY = new double[xN];
327 
328  // Copy X samples in ANN data structure
329  P_V xSmpRV( xRV.imageSet().vectorSpace().zeroVector() );
330  for( unsigned int i = 0; i < xN; i++ ) {
331  // get a sample from the distribution
332  xRV.realizer().realization( xSmpRV );
333  // copy the vector values in the ANN data structure
334  for( unsigned int j = 0; j < dimX; j++ ) {
335  dataX[ i ][ j ] = xSmpRV[ xDimSel[j] ];
336  }
337  }
338 
339  // Copy Y samples in ANN data structure
340  P_V ySmpRV( yRV.imageSet().vectorSpace().zeroVector() );
341  for( unsigned int i = 0; i < yN; i++ ) {
342  // get a sample from the distribution
343  yRV.realizer().realization( ySmpRV );
344  // copy the vector values in the ANN data structure
345  for( unsigned int j = 0; j < dimY; j++ ) {
346  dataY[ i ][ j ] = ySmpRV[ yDimSel[j] ];
347  }
348  }
349 
350  // Get distance to knn for each point
351  distANN_XY( dataX, dataX, distsX, dimX, dimX, xN, xN, k+1, eps ); // k+1 because the 1st nn is itself
352  distANN_XY( dataX, dataY, distsXY, dimX, dimY, xN, yN, k, eps );
353 
354  // Compute KL-divergence estimate
355  double sum_log_ratio = 0.0;
356  for( unsigned int i = 0; i < xN; i++ )
357  {
358  sum_log_ratio += log( distsXY[i] / distsX[i] );
359  }
360  KL_est = (double)dimX/(double)xN * sum_log_ratio + log( (double)yN / ((double)xN-1.0 ) );
361 
362  // Deallocate memory
363  annDeallocPts( dataX );
364  annDeallocPts( dataY );
365  delete [] distsX;
366  delete [] distsXY;
367 
368  return KL_est;
369 }
370 
371 
372 //*****************************************************
373 // Function: estimateCE_ANN
374 // (Cross Entropy)
375 //*****************************************************
376 template <class P_V, class P_M,
377  template <class P_V, class P_M> class RV_1,
378  template <class P_V, class P_M> class RV_2>
379 double estimateCE_ANN( RV_1<P_V,P_M>& xRV,
380  RV_2<P_V,P_M>& yRV,
381  unsigned int xDimSel[], unsigned int dimX,
382  unsigned int yDimSel[], unsigned int dimY,
383  unsigned int xN, unsigned int yN,
384  unsigned int k, double eps )
385 {
386  ANNpointArray dataX;
387  ANNpointArray dataY;
388  double* distsXY;
389  double CE_est;
390  ANNkd_tree* kdTree;
391 
392  // sanity check
393  if( dimX != dimY ) {
394  queso_error_msg("Error-CE: the dimensions should agree");
395  }
396 
397  // Allocate memory
398  dataX = annAllocPts( xN, dimX );
399  dataY = annAllocPts( yN, dimY );
400  distsXY = new double[xN];
401  kdTree = new ANNkd_tree( dataY, yN, dimY );
402 
403  // Copy X samples in ANN data structure
404  P_V xSmpRV( xRV.imageSet().vectorSpace().zeroVector() );
405  for( unsigned int i = 0; i < xN; i++ ) {
406  // get a sample from the distribution
407  xRV.realizer().realization( xSmpRV );
408  // copy the vector values in the ANN data structure
409  for( unsigned int j = 0; j < dimX; j++ ) {
410  dataX[ i ][ j ] = xSmpRV[ xDimSel[j] ];
411  }
412  }
413 
414  // Copy Y samples in ANN data structure
415  P_V ySmpRV( yRV.imageSet().vectorSpace().zeroVector() );
416  for( unsigned int i = 0; i < yN; i++ ) {
417  // get a sample from the distribution
418  yRV.realizer().realization( ySmpRV );
419  // copy the vector values in the ANN data structure
420  for( unsigned int j = 0; j < dimY; j++ ) {
421  dataY[ i ][ j ] = ySmpRV[ yDimSel[j] ];
422  }
423  }
424 
425  // Get distance to knn for each point
426  distANN_XY( dataX, dataY, distsXY, dimX, dimY, xN, yN, k, eps );
427  kdTree = new ANNkd_tree( dataY, yN, dimY );
428 
429  // Compute cross entropy estimate
430  double sum_log = 0.0;
431  for( unsigned int i = 0; i < xN; i++ )
432  {
433  sum_log += log( distsXY[i] );
434  }
435  CE_est = (double)dimX/(double)xN * sum_log + log( (double)yN ) - gsl_sf_psi_int( k );
436 
437  // Deallocate memory
438  annDeallocPts( dataX );
439  annDeallocPts( dataY );
440  delete [] distsXY;
441 
442  return CE_est;
443 }
444 
445 } // End namespace QUESO
446 
447 #endif // QUESO_HAS_ANN
int annkFRSearch(ANNpoint q, ANNdist sqRad, int k, ANNidxArray nn_idx=NULL, ANNdistArray dd=NULL, double eps=0.0)
void annkSearch(ANNpoint q, int k, ANNidxArray nn_idx, ANNdistArray dd, double eps=0.0)
Definition: kd_search.cpp:89
double eps
Definition: ann_sample.cpp:55
#define queso_error_msg(msg)
Definition: asserts.h:47
DLL_API void annDeallocPts(ANNpointArray &pa)
Definition: ANN.cpp:133
ANNdist * ANNdistArray
Definition: ANN.h:377
DLL_API void annClose()
Definition: kd_tree.cpp:221
DLL_API ANNpoint annAllocPt(int dim, ANNcoord c=0)
Definition: ANN.cpp:110
ANNpoint * ANNpointArray
Definition: ANN.h:376
int ANNidx
Definition: ANN.h:175
ANNcoord * ANNpoint
Definition: ANN.h:375
double ANNdist
Definition: ANN.h:159
DLL_API ANNpointArray annAllocPts(int n, int dim)
Definition: ANN.cpp:117
int k
Definition: ann_sample.cpp:53
ANNidx * ANNidxArray
Definition: ANN.h:378

Generated on Thu Jun 11 2015 13:52:32 for queso-0.53.0 by  doxygen 1.8.5