annotate dsp/maths/Histogram.h @ 225:49844bc8a895

* Queen Mary C++ DSP library
author Chris Cannam <c.cannam@qmul.ac.uk>
date Wed, 05 Apr 2006 17:35:59 +0000
parents
children 14839f9a616e
rev   line source
c@225 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@225 2
c@225 3 // Histogram.h: interface for the THistogram class.
c@225 4 //
c@225 5 //////////////////////////////////////////////////////////////////////
c@225 6
c@225 7
c@225 8 #ifndef HISTOGRAM_H
c@225 9 #define HISTOGRAM_H
c@225 10
c@225 11
c@225 12 #include <valarray>
c@225 13
c@225 14 /*! \brief A histogram class
c@225 15
c@225 16 This class computes the histogram of a vector.
c@225 17
c@225 18 \par Template parameters
c@225 19
c@225 20 - T type of input data (can be any: float, double, int, UINT, etc...)
c@225 21 - TOut type of output data: float or double. (default is double)
c@225 22
c@225 23 \par Moments:
c@225 24
c@225 25 The moments (average, standard deviation, skewness, etc.) are computed using
c@225 26 the algorithm of the Numerical recipies (see Numerical recipies in C, Chapter 14.1, pg 613).
c@225 27
c@225 28 \par Example:
c@225 29
c@225 30 This example shows the typical use of the class:
c@225 31 \code
c@225 32 // a vector containing the data
c@225 33 vector<float> data;
c@225 34 // Creating histogram using float data and with 101 containers,
c@225 35 THistogram<float> histo(101);
c@225 36 // computing the histogram
c@225 37 histo.compute(data);
c@225 38 \endcode
c@225 39
c@225 40 Once this is done, you can get a vector with the histogram or the normalized histogram (such that it's area is 1):
c@225 41 \code
c@225 42 // getting normalized histogram
c@225 43 vector<float> v=histo.getNormalizedHistogram();
c@225 44 \endcode
c@225 45
c@225 46 \par Reference
c@225 47
c@225 48 Equally spaced acsissa function integration (used in #GetArea): Numerical Recipies in C, Chapter 4.1, pg 130.
c@225 49
c@225 50 \author Jonathan de Halleux, dehalleux@auto.ucl.ac.be, 2002
c@225 51 */
c@225 52
c@225 53 template<class T, class TOut = double>
c@225 54 class THistogram
c@225 55 {
c@225 56 public:
c@225 57 //! \name Constructors
c@225 58 //@{
c@225 59 /*! Default constructor
c@225 60 \param counters the number of histogram containers (default value is 10)
c@225 61 */
c@225 62 THistogram(unsigned int counters = 10);
c@225 63 virtual ~THistogram() { clear();};
c@225 64 //@}
c@225 65
c@225 66 //! \name Histogram computation, update
c@225 67 //@{
c@225 68 /*! Computes histogram of vector v
c@225 69 \param v vector to compute histogram
c@225 70 \param computeMinMax set to true if min/max of v have to be used to get the histogram limits
c@225 71
c@225 72 This function computes the histogram of v and stores it internally.
c@225 73 \sa Update, GeTHistogram
c@225 74 */
c@225 75 void compute( const std::vector<T>& v, bool computeMinMax = true);
c@225 76 //! Update histogram with the vector v
c@225 77 void update( const std::vector<T>& v);
c@225 78 //! Update histogram with t
c@225 79 void update( const T& t);
c@225 80 //@}
c@225 81
c@225 82 //! \name Resetting functions
c@225 83 //@{
c@225 84 //! Resize the histogram. Warning this function clear the histogram.
c@225 85 void resize( unsigned int counters );
c@225 86 //! Clears the histogram
c@225 87 void clear() { m_counters.clear();};
c@225 88 //@}
c@225 89
c@225 90 //! \name Setters
c@225 91 //@{
c@225 92 /*! This function sets the minimum of the histogram spectrum.
c@225 93 The spectrum is not recomputed, use it with care
c@225 94 */
c@225 95 void setMinSpectrum( const T& min ) { m_min = min; computeStep();};
c@225 96 /*! This function sets the minimum of the histogram spectrum.
c@225 97 The spectrum is not recomputed, use it with care
c@225 98 */
c@225 99 void setMaxSpectrum( const T& max ) { m_max = max; computeStep();};
c@225 100 //@}
c@225 101 //! \name Getters
c@225 102 //@{
c@225 103 //! return minimum of histogram spectrum
c@225 104 const T& getMinSpectrum() const { return m_min;};
c@225 105 //! return maximum of histogram spectrum
c@225 106 const T& getMaxSpectrum() const { return m_max;};
c@225 107 //! return step size of histogram containers
c@225 108 TOut getStep() const { return m_step;};
c@225 109 //! return number of points in histogram
c@225 110 unsigned int getSum() const;
c@225 111 /*! \brief returns area under the histogram
c@225 112
c@225 113 The Simpson rule is used to integrate the histogram.
c@225 114 */
c@225 115 TOut getArea() const;
c@225 116
c@225 117 /*! \brief Computes the moments of the histogram
c@225 118
c@225 119 \param data dataset
c@225 120 \param ave mean
c@225 121 \f[ \bar x = \frac{1}{N} \sum_{j=1}^N x_j\f]
c@225 122 \param adev mean absolute deviation
c@225 123 \f[ adev(X) = \frac{1}{N} \sum_{j=1}^N | x_j - \bar x |\f]
c@225 124 \param var average deviation:
c@225 125 \f[ \mbox{Var}(X) = \frac{1}{N-1} \sum_{j=1}^N (x_j - \bar x)^2\f]
c@225 126 \param sdev standard deviation:
c@225 127 \f[ \sigma(X) = \sqrt{var(\bar x) }\f]
c@225 128 \param skew skewness
c@225 129 \f[ \mbox{Skew}(X) = \frac{1}{N}\sum_{j=1}^N \left[ \frac{x_j - \bar x}{\sigma}\right]^3\f]
c@225 130 \param kurt kurtosis
c@225 131 \f[ \mbox{Kurt}(X) = \left\{ \frac{1}{N}\sum_{j=1}^N \left[ \frac{x_j - \bar x}{\sigma}\right]^4 \right\} - 3\f]
c@225 132
c@225 133 */
c@225 134 static void getMoments(const std::vector<T>& data, TOut& ave, TOut& adev, TOut& sdev, TOut& var, TOut& skew, TOut& kurt);
c@225 135
c@225 136 //! return number of containers
c@225 137 unsigned int getSize() const { return m_counters.size();};
c@225 138 //! returns i-th counter
c@225 139 unsigned int operator [] (unsigned int i) const { ASSERT( i < m_counters.size() ); return m_counters[i];};
c@225 140 //! return the computed histogram
c@225 141 const std::vector<unsigned int>& geTHistogram() const { return m_counters;};
c@225 142 //! return the computed histogram, in TOuts
c@225 143 std::vector<TOut> geTHistogramD() const;
c@225 144 /*! return the normalized computed histogram
c@225 145
c@225 146 \return the histogram such that the area is equal to 1
c@225 147 */
c@225 148 std::vector<TOut> getNormalizedHistogram() const;
c@225 149 //! returns left containers position
c@225 150 std::vector<TOut> getLeftContainers() const;
c@225 151 //! returns center containers position
c@225 152 std::vector<TOut> getCenterContainers() const;
c@225 153 //@}
c@225 154 protected:
c@225 155 //! Computes the step
c@225 156 void computeStep() { m_step = (TOut)(((TOut)(m_max-m_min)) / (m_counters.size()-1));};
c@225 157 //! Data accumulators
c@225 158 std::vector<unsigned int> m_counters;
c@225 159 //! minimum of dataset
c@225 160 T m_min;
c@225 161 //! maximum of dataset
c@225 162 T m_max;
c@225 163 //! width of container
c@225 164 TOut m_step;
c@225 165 };
c@225 166
c@225 167 template<class T, class TOut>
c@225 168 THistogram<T,TOut>::THistogram(unsigned int counters)
c@225 169 : m_counters(counters,0), m_min(0), m_max(0), m_step(0)
c@225 170 {
c@225 171
c@225 172 }
c@225 173
c@225 174 template<class T, class TOut>
c@225 175 void THistogram<T,TOut>::resize( unsigned int counters )
c@225 176 {
c@225 177 clear();
c@225 178
c@225 179 m_counters.resize(counters,0);
c@225 180
c@225 181 computeStep();
c@225 182 }
c@225 183
c@225 184 template<class T, class TOut>
c@225 185 void THistogram<T,TOut>::compute( const std::vector<T>& v, bool computeMinMax)
c@225 186 {
c@225 187 using namespace std;
c@225 188 unsigned int i;
c@225 189 int index;
c@225 190
c@225 191 if (m_counters.empty())
c@225 192 return;
c@225 193
c@225 194 if (computeMinMax)
c@225 195 {
c@225 196 m_max = m_min = v[0];
c@225 197 for (i=1;i<v.size();i++)
c@225 198 {
c@225 199 m_max = std::max( m_max, v[i]);
c@225 200 m_min = std::min( m_min, v[i]);
c@225 201 }
c@225 202 }
c@225 203
c@225 204 computeStep();
c@225 205
c@225 206 for (i = 0;i < v.size() ; i++)
c@225 207 {
c@225 208 index=(int) floor( ((TOut)(v[i]-m_min))/m_step ) ;
c@225 209
c@225 210 if (index >= m_counters.size() || index < 0)
c@225 211 return;
c@225 212
c@225 213 m_counters[index]++;
c@225 214 }
c@225 215 }
c@225 216
c@225 217 template<class T, class TOut>
c@225 218 void THistogram<T,TOut>::update( const std::vector<T>& v)
c@225 219 {
c@225 220 if (m_counters.empty())
c@225 221 return;
c@225 222
c@225 223 computeStep();
c@225 224
c@225 225 TOut size = m_counters.size();
c@225 226
c@225 227 int index;
c@225 228 for (unsigned int i = 0;i < size ; i++)
c@225 229 {
c@225 230 index = (int)floor(((TOut)(v[i]-m_min))/m_step);
c@225 231
c@225 232 if (index >= m_counters.size() || index < 0)
c@225 233 return;
c@225 234
c@225 235 m_counters[index]++;
c@225 236 }
c@225 237 }
c@225 238
c@225 239 template<class T, class TOut>
c@225 240 void THistogram<T,TOut>::update( const T& t)
c@225 241 {
c@225 242 int index=(int) floor( ((TOut)(t-m_min))/m_step ) ;
c@225 243
c@225 244 if (index >= m_counters.size() || index < 0)
c@225 245 return;
c@225 246
c@225 247 m_counters[index]++;
c@225 248 };
c@225 249
c@225 250 template<class T, class TOut>
c@225 251 std::vector<TOut> THistogram<T,TOut>::geTHistogramD() const
c@225 252 {
c@225 253 std::vector<TOut> v(m_counters.size());
c@225 254 for (unsigned int i = 0;i<m_counters.size(); i++)
c@225 255 v[i]=(TOut)m_counters[i];
c@225 256
c@225 257 return v;
c@225 258 }
c@225 259
c@225 260 template <class T, class TOut>
c@225 261 std::vector<TOut> THistogram<T,TOut>::getLeftContainers() const
c@225 262 {
c@225 263 std::vector<TOut> x( m_counters.size());
c@225 264
c@225 265 for (unsigned int i = 0;i<m_counters.size(); i++)
c@225 266 x[i]= m_min + i*m_step;
c@225 267
c@225 268 return x;
c@225 269 }
c@225 270
c@225 271 template <class T, class TOut>
c@225 272 std::vector<TOut> THistogram<T,TOut>::getCenterContainers() const
c@225 273 {
c@225 274 std::vector<TOut> x( m_counters.size());
c@225 275
c@225 276 for (unsigned int i = 0;i<m_counters.size(); i++)
c@225 277 x[i]= m_min + (i+0.5)*m_step;
c@225 278
c@225 279 return x;
c@225 280 }
c@225 281
c@225 282 template <class T, class TOut>
c@225 283 unsigned int THistogram<T,TOut>::getSum() const
c@225 284 {
c@225 285 unsigned int sum = 0;
c@225 286 for (unsigned int i = 0;i<m_counters.size(); i++)
c@225 287 sum+=m_counters[i];
c@225 288
c@225 289 return sum;
c@225 290 }
c@225 291
c@225 292 template <class T, class TOut>
c@225 293 TOut THistogram<T,TOut>::getArea() const
c@225 294 {
c@225 295 const size_t n=m_counters.size();
c@225 296 TOut area=0;
c@225 297
c@225 298 if (n>6)
c@225 299 {
c@225 300 area=3.0/8.0*(m_counters[0]+m_counters[n-1])
c@225 301 +7.0/6.0*(m_counters[1]+m_counters[n-2])
c@225 302 +23.0/24.0*(m_counters[2]+m_counters[n-3]);
c@225 303 for (unsigned int i=3;i<n-3;i++)
c@225 304 {
c@225 305 area+=m_counters[i];
c@225 306 }
c@225 307 }
c@225 308 else if (n>4)
c@225 309 {
c@225 310 area=5.0/12.0*(m_counters[0]+m_counters[n-1])
c@225 311 +13.0/12.0*(m_counters[1]+m_counters[n-2]);
c@225 312 for (unsigned int i=2;i<n-2;i++)
c@225 313 {
c@225 314 area+=m_counters[i];
c@225 315 }
c@225 316 }
c@225 317 else if (n>1)
c@225 318 {
c@225 319 area=1/2.0*(m_counters[0]+m_counters[n-1]);
c@225 320 for (unsigned int i=1;i<n-1;i++)
c@225 321 {
c@225 322 area+=m_counters[i];
c@225 323 }
c@225 324 }
c@225 325 else
c@225 326 area=0;
c@225 327
c@225 328 return area*m_step;
c@225 329 }
c@225 330
c@225 331 template <class T, class TOut>
c@225 332 std::vector<TOut> THistogram<T,TOut>::getNormalizedHistogram() const
c@225 333 {
c@225 334 std::vector<TOut> normCounters( m_counters.size());
c@225 335 TOut area = (TOut)getArea();
c@225 336
c@225 337 for (unsigned int i = 0;i<m_counters.size(); i++)
c@225 338 {
c@225 339 normCounters[i]= (TOut)m_counters[i]/area;
c@225 340 }
c@225 341
c@225 342 return normCounters;
c@225 343 };
c@225 344
c@225 345 template <class T, class TOut>
c@225 346 void THistogram<T,TOut>::getMoments(const std::vector<T>& data, TOut& ave, TOut& adev, TOut& sdev, TOut& var, TOut& skew, TOut& kurt)
c@225 347 {
c@225 348 int j;
c@225 349 double ep=0.0,s,p;
c@225 350 const size_t n = data.size();
c@225 351
c@225 352 if (n <= 1)
c@225 353 // nrerror("n must be at least 2 in moment");
c@225 354 return;
c@225 355
c@225 356 s=0.0; // First pass to get the mean.
c@225 357 for (j=0;j<n;j++)
c@225 358 s += data[j];
c@225 359
c@225 360 ave=s/(n);
c@225 361 adev=var=skew=kurt=0.0;
c@225 362 /* Second pass to get the first (absolute), second,
c@225 363 third, and fourth moments of the
c@225 364 deviation from the mean. */
c@225 365
c@225 366 for (j=0;j<n;j++)
c@225 367 {
c@225 368 adev += fabs(s=data[j]-(ave));
c@225 369 ep += s;
c@225 370 var += (p=s*s);
c@225 371 skew += (p *= s);
c@225 372 kurt += (p *= s);
c@225 373 }
c@225 374
c@225 375
c@225 376 adev /= n;
c@225 377 var=(var-ep*ep/n)/(n-1); // Corrected two-pass formula.
c@225 378 sdev=sqrt(var); // Put the pieces together according to the conventional definitions.
c@225 379 if (var)
c@225 380 {
c@225 381 skew /= (n*(var)*(sdev));
c@225 382 kurt=(kurt)/(n*(var)*(var))-3.0;
c@225 383 }
c@225 384 else
c@225 385 //nrerror("No skew/kurtosis when variance = 0 (in moment)");
c@225 386 return;
c@225 387 }
c@225 388
c@225 389 #endif
c@225 390