annotate maths/Histogram.h @ 241:a98dd8ec96f8

* Move dsp/maths to maths ; bring PCA and HMM across from Soundbite
author Chris Cannam <c.cannam@qmul.ac.uk>
date Wed, 09 Jan 2008 10:31:29 +0000
parents
children
rev   line source
c@241 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@241 2
c@241 3 // Histogram.h: interface for the THistogram class.
c@241 4 //
c@241 5 //////////////////////////////////////////////////////////////////////
c@241 6
c@241 7
c@241 8 #ifndef HISTOGRAM_H
c@241 9 #define HISTOGRAM_H
c@241 10
c@241 11
c@241 12 #include <valarray>
c@241 13
c@241 14 /*! \brief A histogram class
c@241 15
c@241 16 This class computes the histogram of a vector.
c@241 17
c@241 18 \par Template parameters
c@241 19
c@241 20 - T type of input data (can be any: float, double, int, UINT, etc...)
c@241 21 - TOut type of output data: float or double. (default is double)
c@241 22
c@241 23 \par Moments:
c@241 24
c@241 25 The moments (average, standard deviation, skewness, etc.) are computed using
c@241 26 the algorithm of the Numerical recipies (see Numerical recipies in C, Chapter 14.1, pg 613).
c@241 27
c@241 28 \par Example:
c@241 29
c@241 30 This example shows the typical use of the class:
c@241 31 \code
c@241 32 // a vector containing the data
c@241 33 vector<float> data;
c@241 34 // Creating histogram using float data and with 101 containers,
c@241 35 THistogram<float> histo(101);
c@241 36 // computing the histogram
c@241 37 histo.compute(data);
c@241 38 \endcode
c@241 39
c@241 40 Once this is done, you can get a vector with the histogram or the normalized histogram (such that it's area is 1):
c@241 41 \code
c@241 42 // getting normalized histogram
c@241 43 vector<float> v=histo.getNormalizedHistogram();
c@241 44 \endcode
c@241 45
c@241 46 \par Reference
c@241 47
c@241 48 Equally spaced acsissa function integration (used in #GetArea): Numerical Recipies in C, Chapter 4.1, pg 130.
c@241 49
c@241 50 \author Jonathan de Halleux, dehalleux@auto.ucl.ac.be, 2002
c@241 51 */
c@241 52
c@241 53 template<class T, class TOut = double>
c@241 54 class THistogram
c@241 55 {
c@241 56 public:
c@241 57 //! \name Constructors
c@241 58 //@{
c@241 59 /*! Default constructor
c@241 60 \param counters the number of histogram containers (default value is 10)
c@241 61 */
c@241 62 THistogram(unsigned int counters = 10);
c@241 63 virtual ~THistogram() { clear();};
c@241 64 //@}
c@241 65
c@241 66 //! \name Histogram computation, update
c@241 67 //@{
c@241 68 /*! Computes histogram of vector v
c@241 69 \param v vector to compute histogram
c@241 70 \param computeMinMax set to true if min/max of v have to be used to get the histogram limits
c@241 71
c@241 72 This function computes the histogram of v and stores it internally.
c@241 73 \sa Update, GeTHistogram
c@241 74 */
c@241 75 void compute( const std::vector<T>& v, bool computeMinMax = true);
c@241 76 //! Update histogram with the vector v
c@241 77 void update( const std::vector<T>& v);
c@241 78 //! Update histogram with t
c@241 79 void update( const T& t);
c@241 80 //@}
c@241 81
c@241 82 //! \name Resetting functions
c@241 83 //@{
c@241 84 //! Resize the histogram. Warning this function clear the histogram.
c@241 85 void resize( unsigned int counters );
c@241 86 //! Clears the histogram
c@241 87 void clear() { m_counters.clear();};
c@241 88 //@}
c@241 89
c@241 90 //! \name Setters
c@241 91 //@{
c@241 92 /*! This function sets the minimum of the histogram spectrum.
c@241 93 The spectrum is not recomputed, use it with care
c@241 94 */
c@241 95 void setMinSpectrum( const T& min ) { m_min = min; computeStep();};
c@241 96 /*! This function sets the minimum of the histogram spectrum.
c@241 97 The spectrum is not recomputed, use it with care
c@241 98 */
c@241 99 void setMaxSpectrum( const T& max ) { m_max = max; computeStep();};
c@241 100 //@}
c@241 101 //! \name Getters
c@241 102 //@{
c@241 103 //! return minimum of histogram spectrum
c@241 104 const T& getMinSpectrum() const { return m_min;};
c@241 105 //! return maximum of histogram spectrum
c@241 106 const T& getMaxSpectrum() const { return m_max;};
c@241 107 //! return step size of histogram containers
c@241 108 TOut getStep() const { return m_step;};
c@241 109 //! return number of points in histogram
c@241 110 unsigned int getSum() const;
c@241 111 /*! \brief returns area under the histogram
c@241 112
c@241 113 The Simpson rule is used to integrate the histogram.
c@241 114 */
c@241 115 TOut getArea() const;
c@241 116
c@241 117 /*! \brief Computes the moments of the histogram
c@241 118
c@241 119 \param data dataset
c@241 120 \param ave mean
c@241 121 \f[ \bar x = \frac{1}{N} \sum_{j=1}^N x_j\f]
c@241 122 \param adev mean absolute deviation
c@241 123 \f[ adev(X) = \frac{1}{N} \sum_{j=1}^N | x_j - \bar x |\f]
c@241 124 \param var average deviation:
c@241 125 \f[ \mbox{Var}(X) = \frac{1}{N-1} \sum_{j=1}^N (x_j - \bar x)^2\f]
c@241 126 \param sdev standard deviation:
c@241 127 \f[ \sigma(X) = \sqrt{var(\bar x) }\f]
c@241 128 \param skew skewness
c@241 129 \f[ \mbox{Skew}(X) = \frac{1}{N}\sum_{j=1}^N \left[ \frac{x_j - \bar x}{\sigma}\right]^3\f]
c@241 130 \param kurt kurtosis
c@241 131 \f[ \mbox{Kurt}(X) = \left\{ \frac{1}{N}\sum_{j=1}^N \left[ \frac{x_j - \bar x}{\sigma}\right]^4 \right\} - 3\f]
c@241 132
c@241 133 */
c@241 134 static void getMoments(const std::vector<T>& data, TOut& ave, TOut& adev, TOut& sdev, TOut& var, TOut& skew, TOut& kurt);
c@241 135
c@241 136 //! return number of containers
c@241 137 unsigned int getSize() const { return m_counters.size();};
c@241 138 //! returns i-th counter
c@241 139 unsigned int operator [] (unsigned int i) const {
c@241 140 // ASSERT( i < m_counters.size() );
c@241 141 return m_counters[i];
c@241 142 };
c@241 143 //! return the computed histogram
c@241 144 const std::vector<unsigned int>& geTHistogram() const { return m_counters;};
c@241 145 //! return the computed histogram, in TOuts
c@241 146 std::vector<TOut> geTHistogramD() const;
c@241 147 /*! return the normalized computed histogram
c@241 148
c@241 149 \return the histogram such that the area is equal to 1
c@241 150 */
c@241 151 std::vector<TOut> getNormalizedHistogram() const;
c@241 152 //! returns left containers position
c@241 153 std::vector<TOut> getLeftContainers() const;
c@241 154 //! returns center containers position
c@241 155 std::vector<TOut> getCenterContainers() const;
c@241 156 //@}
c@241 157 protected:
c@241 158 //! Computes the step
c@241 159 void computeStep() { m_step = (TOut)(((TOut)(m_max-m_min)) / (m_counters.size()-1));};
c@241 160 //! Data accumulators
c@241 161 std::vector<unsigned int> m_counters;
c@241 162 //! minimum of dataset
c@241 163 T m_min;
c@241 164 //! maximum of dataset
c@241 165 T m_max;
c@241 166 //! width of container
c@241 167 TOut m_step;
c@241 168 };
c@241 169
c@241 170 template<class T, class TOut>
c@241 171 THistogram<T,TOut>::THistogram(unsigned int counters)
c@241 172 : m_counters(counters,0), m_min(0), m_max(0), m_step(0)
c@241 173 {
c@241 174
c@241 175 }
c@241 176
c@241 177 template<class T, class TOut>
c@241 178 void THistogram<T,TOut>::resize( unsigned int counters )
c@241 179 {
c@241 180 clear();
c@241 181
c@241 182 m_counters.resize(counters,0);
c@241 183
c@241 184 computeStep();
c@241 185 }
c@241 186
c@241 187 template<class T, class TOut>
c@241 188 void THistogram<T,TOut>::compute( const std::vector<T>& v, bool computeMinMax)
c@241 189 {
c@241 190 using namespace std;
c@241 191 unsigned int i;
c@241 192 int index;
c@241 193
c@241 194 if (m_counters.empty())
c@241 195 return;
c@241 196
c@241 197 if (computeMinMax)
c@241 198 {
c@241 199 m_max = m_min = v[0];
c@241 200 for (i=1;i<v.size();i++)
c@241 201 {
c@241 202 m_max = std::max( m_max, v[i]);
c@241 203 m_min = std::min( m_min, v[i]);
c@241 204 }
c@241 205 }
c@241 206
c@241 207 computeStep();
c@241 208
c@241 209 for (i = 0;i < v.size() ; i++)
c@241 210 {
c@241 211 index=(int) floor( ((TOut)(v[i]-m_min))/m_step ) ;
c@241 212
c@241 213 if (index >= m_counters.size() || index < 0)
c@241 214 return;
c@241 215
c@241 216 m_counters[index]++;
c@241 217 }
c@241 218 }
c@241 219
c@241 220 template<class T, class TOut>
c@241 221 void THistogram<T,TOut>::update( const std::vector<T>& v)
c@241 222 {
c@241 223 if (m_counters.empty())
c@241 224 return;
c@241 225
c@241 226 computeStep();
c@241 227
c@241 228 TOut size = m_counters.size();
c@241 229
c@241 230 int index;
c@241 231 for (unsigned int i = 0;i < size ; i++)
c@241 232 {
c@241 233 index = (int)floor(((TOut)(v[i]-m_min))/m_step);
c@241 234
c@241 235 if (index >= m_counters.size() || index < 0)
c@241 236 return;
c@241 237
c@241 238 m_counters[index]++;
c@241 239 }
c@241 240 }
c@241 241
c@241 242 template<class T, class TOut>
c@241 243 void THistogram<T,TOut>::update( const T& t)
c@241 244 {
c@241 245 int index=(int) floor( ((TOut)(t-m_min))/m_step ) ;
c@241 246
c@241 247 if (index >= m_counters.size() || index < 0)
c@241 248 return;
c@241 249
c@241 250 m_counters[index]++;
c@241 251 };
c@241 252
c@241 253 template<class T, class TOut>
c@241 254 std::vector<TOut> THistogram<T,TOut>::geTHistogramD() const
c@241 255 {
c@241 256 std::vector<TOut> v(m_counters.size());
c@241 257 for (unsigned int i = 0;i<m_counters.size(); i++)
c@241 258 v[i]=(TOut)m_counters[i];
c@241 259
c@241 260 return v;
c@241 261 }
c@241 262
c@241 263 template <class T, class TOut>
c@241 264 std::vector<TOut> THistogram<T,TOut>::getLeftContainers() const
c@241 265 {
c@241 266 std::vector<TOut> x( m_counters.size());
c@241 267
c@241 268 for (unsigned int i = 0;i<m_counters.size(); i++)
c@241 269 x[i]= m_min + i*m_step;
c@241 270
c@241 271 return x;
c@241 272 }
c@241 273
c@241 274 template <class T, class TOut>
c@241 275 std::vector<TOut> THistogram<T,TOut>::getCenterContainers() const
c@241 276 {
c@241 277 std::vector<TOut> x( m_counters.size());
c@241 278
c@241 279 for (unsigned int i = 0;i<m_counters.size(); i++)
c@241 280 x[i]= m_min + (i+0.5)*m_step;
c@241 281
c@241 282 return x;
c@241 283 }
c@241 284
c@241 285 template <class T, class TOut>
c@241 286 unsigned int THistogram<T,TOut>::getSum() const
c@241 287 {
c@241 288 unsigned int sum = 0;
c@241 289 for (unsigned int i = 0;i<m_counters.size(); i++)
c@241 290 sum+=m_counters[i];
c@241 291
c@241 292 return sum;
c@241 293 }
c@241 294
c@241 295 template <class T, class TOut>
c@241 296 TOut THistogram<T,TOut>::getArea() const
c@241 297 {
c@241 298 const size_t n=m_counters.size();
c@241 299 TOut area=0;
c@241 300
c@241 301 if (n>6)
c@241 302 {
c@241 303 area=3.0/8.0*(m_counters[0]+m_counters[n-1])
c@241 304 +7.0/6.0*(m_counters[1]+m_counters[n-2])
c@241 305 +23.0/24.0*(m_counters[2]+m_counters[n-3]);
c@241 306 for (unsigned int i=3;i<n-3;i++)
c@241 307 {
c@241 308 area+=m_counters[i];
c@241 309 }
c@241 310 }
c@241 311 else if (n>4)
c@241 312 {
c@241 313 area=5.0/12.0*(m_counters[0]+m_counters[n-1])
c@241 314 +13.0/12.0*(m_counters[1]+m_counters[n-2]);
c@241 315 for (unsigned int i=2;i<n-2;i++)
c@241 316 {
c@241 317 area+=m_counters[i];
c@241 318 }
c@241 319 }
c@241 320 else if (n>1)
c@241 321 {
c@241 322 area=1/2.0*(m_counters[0]+m_counters[n-1]);
c@241 323 for (unsigned int i=1;i<n-1;i++)
c@241 324 {
c@241 325 area+=m_counters[i];
c@241 326 }
c@241 327 }
c@241 328 else
c@241 329 area=0;
c@241 330
c@241 331 return area*m_step;
c@241 332 }
c@241 333
c@241 334 template <class T, class TOut>
c@241 335 std::vector<TOut> THistogram<T,TOut>::getNormalizedHistogram() const
c@241 336 {
c@241 337 std::vector<TOut> normCounters( m_counters.size());
c@241 338 TOut area = (TOut)getArea();
c@241 339
c@241 340 for (unsigned int i = 0;i<m_counters.size(); i++)
c@241 341 {
c@241 342 normCounters[i]= (TOut)m_counters[i]/area;
c@241 343 }
c@241 344
c@241 345 return normCounters;
c@241 346 };
c@241 347
c@241 348 template <class T, class TOut>
c@241 349 void THistogram<T,TOut>::getMoments(const std::vector<T>& data, TOut& ave, TOut& adev, TOut& sdev, TOut& var, TOut& skew, TOut& kurt)
c@241 350 {
c@241 351 int j;
c@241 352 double ep=0.0,s,p;
c@241 353 const size_t n = data.size();
c@241 354
c@241 355 if (n <= 1)
c@241 356 // nrerror("n must be at least 2 in moment");
c@241 357 return;
c@241 358
c@241 359 s=0.0; // First pass to get the mean.
c@241 360 for (j=0;j<n;j++)
c@241 361 s += data[j];
c@241 362
c@241 363 ave=s/(n);
c@241 364 adev=var=skew=kurt=0.0;
c@241 365 /* Second pass to get the first (absolute), second,
c@241 366 third, and fourth moments of the
c@241 367 deviation from the mean. */
c@241 368
c@241 369 for (j=0;j<n;j++)
c@241 370 {
c@241 371 adev += fabs(s=data[j]-(ave));
c@241 372 ep += s;
c@241 373 var += (p=s*s);
c@241 374 skew += (p *= s);
c@241 375 kurt += (p *= s);
c@241 376 }
c@241 377
c@241 378
c@241 379 adev /= n;
c@241 380 var=(var-ep*ep/n)/(n-1); // Corrected two-pass formula.
c@241 381 sdev=sqrt(var); // Put the pieces together according to the conventional definitions.
c@241 382 if (var)
c@241 383 {
c@241 384 skew /= (n*(var)*(sdev));
c@241 385 kurt=(kurt)/(n*(var)*(var))-3.0;
c@241 386 }
c@241 387 else
c@241 388 //nrerror("No skew/kurtosis when variance = 0 (in moment)");
c@241 389 return;
c@241 390 }
c@241 391
c@241 392 #endif
c@241 393