annotate maths/Histogram.h @ 57:d241e7701c0c

* remove some debug output
author cannam
date Fri, 27 Feb 2009 13:07:22 +0000
parents 2e3f5d2d62c1
children
rev   line source
cannam@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@0 2
cannam@0 3 // Histogram.h: interface for the THistogram class.
cannam@0 4 //
cannam@0 5 //////////////////////////////////////////////////////////////////////
cannam@0 6
cannam@0 7
cannam@0 8 #ifndef HISTOGRAM_H
cannam@0 9 #define HISTOGRAM_H
cannam@0 10
cannam@0 11
cannam@0 12 #include <valarray>
cannam@0 13
cannam@0 14 /*! \brief A histogram class
cannam@0 15
cannam@0 16 This class computes the histogram of a vector.
cannam@0 17
cannam@0 18 \par Template parameters
cannam@0 19
cannam@0 20 - T type of input data (can be any: float, double, int, UINT, etc...)
cannam@0 21 - TOut type of output data: float or double. (default is double)
cannam@0 22
cannam@0 23 \par Moments:
cannam@0 24
cannam@0 25 The moments (average, standard deviation, skewness, etc.) are computed using
cannam@0 26 the algorithm of the Numerical recipies (see Numerical recipies in C, Chapter 14.1, pg 613).
cannam@0 27
cannam@0 28 \par Example:
cannam@0 29
cannam@0 30 This example shows the typical use of the class:
cannam@0 31 \code
cannam@0 32 // a vector containing the data
cannam@0 33 vector<float> data;
cannam@0 34 // Creating histogram using float data and with 101 containers,
cannam@0 35 THistogram<float> histo(101);
cannam@0 36 // computing the histogram
cannam@0 37 histo.compute(data);
cannam@0 38 \endcode
cannam@0 39
cannam@0 40 Once this is done, you can get a vector with the histogram or the normalized histogram (such that it's area is 1):
cannam@0 41 \code
cannam@0 42 // getting normalized histogram
cannam@0 43 vector<float> v=histo.getNormalizedHistogram();
cannam@0 44 \endcode
cannam@0 45
cannam@0 46 \par Reference
cannam@0 47
cannam@0 48 Equally spaced acsissa function integration (used in #GetArea): Numerical Recipies in C, Chapter 4.1, pg 130.
cannam@0 49
cannam@0 50 \author Jonathan de Halleux, dehalleux@auto.ucl.ac.be, 2002
cannam@0 51 */
cannam@0 52
cannam@0 53 template<class T, class TOut = double>
cannam@0 54 class THistogram
cannam@0 55 {
cannam@0 56 public:
cannam@0 57 //! \name Constructors
cannam@0 58 //@{
cannam@0 59 /*! Default constructor
cannam@0 60 \param counters the number of histogram containers (default value is 10)
cannam@0 61 */
cannam@0 62 THistogram(unsigned int counters = 10);
cannam@0 63 virtual ~THistogram() { clear();};
cannam@0 64 //@}
cannam@0 65
cannam@0 66 //! \name Histogram computation, update
cannam@0 67 //@{
cannam@0 68 /*! Computes histogram of vector v
cannam@0 69 \param v vector to compute histogram
cannam@0 70 \param computeMinMax set to true if min/max of v have to be used to get the histogram limits
cannam@0 71
cannam@0 72 This function computes the histogram of v and stores it internally.
cannam@0 73 \sa Update, GeTHistogram
cannam@0 74 */
cannam@0 75 void compute( const std::vector<T>& v, bool computeMinMax = true);
cannam@0 76 //! Update histogram with the vector v
cannam@0 77 void update( const std::vector<T>& v);
cannam@0 78 //! Update histogram with t
cannam@0 79 void update( const T& t);
cannam@0 80 //@}
cannam@0 81
cannam@0 82 //! \name Resetting functions
cannam@0 83 //@{
cannam@0 84 //! Resize the histogram. Warning this function clear the histogram.
cannam@0 85 void resize( unsigned int counters );
cannam@0 86 //! Clears the histogram
cannam@0 87 void clear() { m_counters.clear();};
cannam@0 88 //@}
cannam@0 89
cannam@0 90 //! \name Setters
cannam@0 91 //@{
cannam@0 92 /*! This function sets the minimum of the histogram spectrum.
cannam@0 93 The spectrum is not recomputed, use it with care
cannam@0 94 */
cannam@0 95 void setMinSpectrum( const T& min ) { m_min = min; computeStep();};
cannam@0 96 /*! This function sets the minimum of the histogram spectrum.
cannam@0 97 The spectrum is not recomputed, use it with care
cannam@0 98 */
cannam@0 99 void setMaxSpectrum( const T& max ) { m_max = max; computeStep();};
cannam@0 100 //@}
cannam@0 101 //! \name Getters
cannam@0 102 //@{
cannam@0 103 //! return minimum of histogram spectrum
cannam@0 104 const T& getMinSpectrum() const { return m_min;};
cannam@0 105 //! return maximum of histogram spectrum
cannam@0 106 const T& getMaxSpectrum() const { return m_max;};
cannam@0 107 //! return step size of histogram containers
cannam@0 108 TOut getStep() const { return m_step;};
cannam@0 109 //! return number of points in histogram
cannam@0 110 unsigned int getSum() const;
cannam@0 111 /*! \brief returns area under the histogram
cannam@0 112
cannam@0 113 The Simpson rule is used to integrate the histogram.
cannam@0 114 */
cannam@0 115 TOut getArea() const;
cannam@0 116
cannam@0 117 /*! \brief Computes the moments of the histogram
cannam@0 118
cannam@0 119 \param data dataset
cannam@0 120 \param ave mean
cannam@0 121 \f[ \bar x = \frac{1}{N} \sum_{j=1}^N x_j\f]
cannam@0 122 \param adev mean absolute deviation
cannam@0 123 \f[ adev(X) = \frac{1}{N} \sum_{j=1}^N | x_j - \bar x |\f]
cannam@0 124 \param var average deviation:
cannam@0 125 \f[ \mbox{Var}(X) = \frac{1}{N-1} \sum_{j=1}^N (x_j - \bar x)^2\f]
cannam@0 126 \param sdev standard deviation:
cannam@0 127 \f[ \sigma(X) = \sqrt{var(\bar x) }\f]
cannam@0 128 \param skew skewness
cannam@0 129 \f[ \mbox{Skew}(X) = \frac{1}{N}\sum_{j=1}^N \left[ \frac{x_j - \bar x}{\sigma}\right]^3\f]
cannam@0 130 \param kurt kurtosis
cannam@0 131 \f[ \mbox{Kurt}(X) = \left\{ \frac{1}{N}\sum_{j=1}^N \left[ \frac{x_j - \bar x}{\sigma}\right]^4 \right\} - 3\f]
cannam@0 132
cannam@0 133 */
cannam@0 134 static void getMoments(const std::vector<T>& data, TOut& ave, TOut& adev, TOut& sdev, TOut& var, TOut& skew, TOut& kurt);
cannam@0 135
cannam@0 136 //! return number of containers
cannam@0 137 unsigned int getSize() const { return m_counters.size();};
cannam@0 138 //! returns i-th counter
cannam@4 139 unsigned int operator [] (unsigned int i) const {
cannam@4 140 // ASSERT( i < m_counters.size() );
cannam@4 141 return m_counters[i];
cannam@4 142 };
cannam@0 143 //! return the computed histogram
cannam@0 144 const std::vector<unsigned int>& geTHistogram() const { return m_counters;};
cannam@0 145 //! return the computed histogram, in TOuts
cannam@0 146 std::vector<TOut> geTHistogramD() const;
cannam@0 147 /*! return the normalized computed histogram
cannam@0 148
cannam@0 149 \return the histogram such that the area is equal to 1
cannam@0 150 */
cannam@0 151 std::vector<TOut> getNormalizedHistogram() const;
cannam@0 152 //! returns left containers position
cannam@0 153 std::vector<TOut> getLeftContainers() const;
cannam@0 154 //! returns center containers position
cannam@0 155 std::vector<TOut> getCenterContainers() const;
cannam@0 156 //@}
cannam@0 157 protected:
cannam@0 158 //! Computes the step
cannam@0 159 void computeStep() { m_step = (TOut)(((TOut)(m_max-m_min)) / (m_counters.size()-1));};
cannam@0 160 //! Data accumulators
cannam@0 161 std::vector<unsigned int> m_counters;
cannam@0 162 //! minimum of dataset
cannam@0 163 T m_min;
cannam@0 164 //! maximum of dataset
cannam@0 165 T m_max;
cannam@0 166 //! width of container
cannam@0 167 TOut m_step;
cannam@0 168 };
cannam@0 169
cannam@0 170 template<class T, class TOut>
cannam@0 171 THistogram<T,TOut>::THistogram(unsigned int counters)
cannam@0 172 : m_counters(counters,0), m_min(0), m_max(0), m_step(0)
cannam@0 173 {
cannam@0 174
cannam@0 175 }
cannam@0 176
cannam@0 177 template<class T, class TOut>
cannam@0 178 void THistogram<T,TOut>::resize( unsigned int counters )
cannam@0 179 {
cannam@0 180 clear();
cannam@0 181
cannam@0 182 m_counters.resize(counters,0);
cannam@0 183
cannam@0 184 computeStep();
cannam@0 185 }
cannam@0 186
cannam@0 187 template<class T, class TOut>
cannam@0 188 void THistogram<T,TOut>::compute( const std::vector<T>& v, bool computeMinMax)
cannam@0 189 {
cannam@0 190 using namespace std;
cannam@0 191 unsigned int i;
cannam@0 192 int index;
cannam@0 193
cannam@0 194 if (m_counters.empty())
cannam@0 195 return;
cannam@0 196
cannam@0 197 if (computeMinMax)
cannam@0 198 {
cannam@0 199 m_max = m_min = v[0];
cannam@0 200 for (i=1;i<v.size();i++)
cannam@0 201 {
cannam@0 202 m_max = std::max( m_max, v[i]);
cannam@0 203 m_min = std::min( m_min, v[i]);
cannam@0 204 }
cannam@0 205 }
cannam@0 206
cannam@0 207 computeStep();
cannam@0 208
cannam@0 209 for (i = 0;i < v.size() ; i++)
cannam@0 210 {
cannam@0 211 index=(int) floor( ((TOut)(v[i]-m_min))/m_step ) ;
cannam@0 212
cannam@0 213 if (index >= m_counters.size() || index < 0)
cannam@0 214 return;
cannam@0 215
cannam@0 216 m_counters[index]++;
cannam@0 217 }
cannam@0 218 }
cannam@0 219
cannam@0 220 template<class T, class TOut>
cannam@0 221 void THistogram<T,TOut>::update( const std::vector<T>& v)
cannam@0 222 {
cannam@0 223 if (m_counters.empty())
cannam@0 224 return;
cannam@0 225
cannam@0 226 computeStep();
cannam@0 227
cannam@0 228 TOut size = m_counters.size();
cannam@0 229
cannam@0 230 int index;
cannam@0 231 for (unsigned int i = 0;i < size ; i++)
cannam@0 232 {
cannam@0 233 index = (int)floor(((TOut)(v[i]-m_min))/m_step);
cannam@0 234
cannam@0 235 if (index >= m_counters.size() || index < 0)
cannam@0 236 return;
cannam@0 237
cannam@0 238 m_counters[index]++;
cannam@0 239 }
cannam@0 240 }
cannam@0 241
cannam@0 242 template<class T, class TOut>
cannam@0 243 void THistogram<T,TOut>::update( const T& t)
cannam@0 244 {
cannam@0 245 int index=(int) floor( ((TOut)(t-m_min))/m_step ) ;
cannam@0 246
cannam@0 247 if (index >= m_counters.size() || index < 0)
cannam@0 248 return;
cannam@0 249
cannam@0 250 m_counters[index]++;
cannam@0 251 };
cannam@0 252
cannam@0 253 template<class T, class TOut>
cannam@0 254 std::vector<TOut> THistogram<T,TOut>::geTHistogramD() const
cannam@0 255 {
cannam@0 256 std::vector<TOut> v(m_counters.size());
cannam@0 257 for (unsigned int i = 0;i<m_counters.size(); i++)
cannam@0 258 v[i]=(TOut)m_counters[i];
cannam@0 259
cannam@0 260 return v;
cannam@0 261 }
cannam@0 262
cannam@0 263 template <class T, class TOut>
cannam@0 264 std::vector<TOut> THistogram<T,TOut>::getLeftContainers() const
cannam@0 265 {
cannam@0 266 std::vector<TOut> x( m_counters.size());
cannam@0 267
cannam@0 268 for (unsigned int i = 0;i<m_counters.size(); i++)
cannam@0 269 x[i]= m_min + i*m_step;
cannam@0 270
cannam@0 271 return x;
cannam@0 272 }
cannam@0 273
cannam@0 274 template <class T, class TOut>
cannam@0 275 std::vector<TOut> THistogram<T,TOut>::getCenterContainers() const
cannam@0 276 {
cannam@0 277 std::vector<TOut> x( m_counters.size());
cannam@0 278
cannam@0 279 for (unsigned int i = 0;i<m_counters.size(); i++)
cannam@0 280 x[i]= m_min + (i+0.5)*m_step;
cannam@0 281
cannam@0 282 return x;
cannam@0 283 }
cannam@0 284
cannam@0 285 template <class T, class TOut>
cannam@0 286 unsigned int THistogram<T,TOut>::getSum() const
cannam@0 287 {
cannam@0 288 unsigned int sum = 0;
cannam@0 289 for (unsigned int i = 0;i<m_counters.size(); i++)
cannam@0 290 sum+=m_counters[i];
cannam@0 291
cannam@0 292 return sum;
cannam@0 293 }
cannam@0 294
cannam@0 295 template <class T, class TOut>
cannam@0 296 TOut THistogram<T,TOut>::getArea() const
cannam@0 297 {
cannam@0 298 const size_t n=m_counters.size();
cannam@0 299 TOut area=0;
cannam@0 300
cannam@0 301 if (n>6)
cannam@0 302 {
cannam@0 303 area=3.0/8.0*(m_counters[0]+m_counters[n-1])
cannam@0 304 +7.0/6.0*(m_counters[1]+m_counters[n-2])
cannam@0 305 +23.0/24.0*(m_counters[2]+m_counters[n-3]);
cannam@0 306 for (unsigned int i=3;i<n-3;i++)
cannam@0 307 {
cannam@0 308 area+=m_counters[i];
cannam@0 309 }
cannam@0 310 }
cannam@0 311 else if (n>4)
cannam@0 312 {
cannam@0 313 area=5.0/12.0*(m_counters[0]+m_counters[n-1])
cannam@0 314 +13.0/12.0*(m_counters[1]+m_counters[n-2]);
cannam@0 315 for (unsigned int i=2;i<n-2;i++)
cannam@0 316 {
cannam@0 317 area+=m_counters[i];
cannam@0 318 }
cannam@0 319 }
cannam@0 320 else if (n>1)
cannam@0 321 {
cannam@0 322 area=1/2.0*(m_counters[0]+m_counters[n-1]);
cannam@0 323 for (unsigned int i=1;i<n-1;i++)
cannam@0 324 {
cannam@0 325 area+=m_counters[i];
cannam@0 326 }
cannam@0 327 }
cannam@0 328 else
cannam@0 329 area=0;
cannam@0 330
cannam@0 331 return area*m_step;
cannam@0 332 }
cannam@0 333
cannam@0 334 template <class T, class TOut>
cannam@0 335 std::vector<TOut> THistogram<T,TOut>::getNormalizedHistogram() const
cannam@0 336 {
cannam@0 337 std::vector<TOut> normCounters( m_counters.size());
cannam@0 338 TOut area = (TOut)getArea();
cannam@0 339
cannam@0 340 for (unsigned int i = 0;i<m_counters.size(); i++)
cannam@0 341 {
cannam@0 342 normCounters[i]= (TOut)m_counters[i]/area;
cannam@0 343 }
cannam@0 344
cannam@0 345 return normCounters;
cannam@0 346 };
cannam@0 347
cannam@0 348 template <class T, class TOut>
cannam@0 349 void THistogram<T,TOut>::getMoments(const std::vector<T>& data, TOut& ave, TOut& adev, TOut& sdev, TOut& var, TOut& skew, TOut& kurt)
cannam@0 350 {
cannam@0 351 int j;
cannam@0 352 double ep=0.0,s,p;
cannam@0 353 const size_t n = data.size();
cannam@0 354
cannam@0 355 if (n <= 1)
cannam@0 356 // nrerror("n must be at least 2 in moment");
cannam@0 357 return;
cannam@0 358
cannam@0 359 s=0.0; // First pass to get the mean.
cannam@0 360 for (j=0;j<n;j++)
cannam@0 361 s += data[j];
cannam@0 362
cannam@0 363 ave=s/(n);
cannam@0 364 adev=var=skew=kurt=0.0;
cannam@0 365 /* Second pass to get the first (absolute), second,
cannam@0 366 third, and fourth moments of the
cannam@0 367 deviation from the mean. */
cannam@0 368
cannam@0 369 for (j=0;j<n;j++)
cannam@0 370 {
cannam@0 371 adev += fabs(s=data[j]-(ave));
cannam@0 372 ep += s;
cannam@0 373 var += (p=s*s);
cannam@0 374 skew += (p *= s);
cannam@0 375 kurt += (p *= s);
cannam@0 376 }
cannam@0 377
cannam@0 378
cannam@0 379 adev /= n;
cannam@0 380 var=(var-ep*ep/n)/(n-1); // Corrected two-pass formula.
cannam@0 381 sdev=sqrt(var); // Put the pieces together according to the conventional definitions.
cannam@0 382 if (var)
cannam@0 383 {
cannam@0 384 skew /= (n*(var)*(sdev));
cannam@0 385 kurt=(kurt)/(n*(var)*(var))-3.0;
cannam@0 386 }
cannam@0 387 else
cannam@0 388 //nrerror("No skew/kurtosis when variance = 0 (in moment)");
cannam@0 389 return;
cannam@0 390 }
cannam@0 391
cannam@0 392 #endif
cannam@0 393