comparison maths/Histogram.h @ 16:2e3f5d2d62c1

* Move dsp/maths to maths ; bring PCA and HMM across from Soundbite
author cannam
date Wed, 09 Jan 2008 10:31:29 +0000
parents dsp/maths/Histogram.h@14839f9a616e
children
comparison
equal deleted inserted replaced
15:10c3f9df4a07 16:2e3f5d2d62c1
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 // Histogram.h: interface for the THistogram class.
4 //
5 //////////////////////////////////////////////////////////////////////
6
7
8 #ifndef HISTOGRAM_H
9 #define HISTOGRAM_H
10
11
12 #include <valarray>
13
14 /*! \brief A histogram class
15
16 This class computes the histogram of a vector.
17
18 \par Template parameters
19
20 - T type of input data (can be any: float, double, int, UINT, etc...)
21 - TOut type of output data: float or double. (default is double)
22
23 \par Moments:
24
25 The moments (average, standard deviation, skewness, etc.) are computed using
26 the algorithm of the Numerical recipies (see Numerical recipies in C, Chapter 14.1, pg 613).
27
28 \par Example:
29
30 This example shows the typical use of the class:
31 \code
32 // a vector containing the data
33 vector<float> data;
34 // Creating histogram using float data and with 101 containers,
35 THistogram<float> histo(101);
36 // computing the histogram
37 histo.compute(data);
38 \endcode
39
40 Once this is done, you can get a vector with the histogram or the normalized histogram (such that it's area is 1):
41 \code
42 // getting normalized histogram
43 vector<float> v=histo.getNormalizedHistogram();
44 \endcode
45
46 \par Reference
47
48 Equally spaced acsissa function integration (used in #GetArea): Numerical Recipies in C, Chapter 4.1, pg 130.
49
50 \author Jonathan de Halleux, dehalleux@auto.ucl.ac.be, 2002
51 */
52
53 template<class T, class TOut = double>
54 class THistogram
55 {
56 public:
57 //! \name Constructors
58 //@{
59 /*! Default constructor
60 \param counters the number of histogram containers (default value is 10)
61 */
62 THistogram(unsigned int counters = 10);
63 virtual ~THistogram() { clear();};
64 //@}
65
66 //! \name Histogram computation, update
67 //@{
68 /*! Computes histogram of vector v
69 \param v vector to compute histogram
70 \param computeMinMax set to true if min/max of v have to be used to get the histogram limits
71
72 This function computes the histogram of v and stores it internally.
73 \sa Update, GeTHistogram
74 */
75 void compute( const std::vector<T>& v, bool computeMinMax = true);
76 //! Update histogram with the vector v
77 void update( const std::vector<T>& v);
78 //! Update histogram with t
79 void update( const T& t);
80 //@}
81
82 //! \name Resetting functions
83 //@{
84 //! Resize the histogram. Warning this function clear the histogram.
85 void resize( unsigned int counters );
86 //! Clears the histogram
87 void clear() { m_counters.clear();};
88 //@}
89
90 //! \name Setters
91 //@{
92 /*! This function sets the minimum of the histogram spectrum.
93 The spectrum is not recomputed, use it with care
94 */
95 void setMinSpectrum( const T& min ) { m_min = min; computeStep();};
96 /*! This function sets the minimum of the histogram spectrum.
97 The spectrum is not recomputed, use it with care
98 */
99 void setMaxSpectrum( const T& max ) { m_max = max; computeStep();};
100 //@}
101 //! \name Getters
102 //@{
103 //! return minimum of histogram spectrum
104 const T& getMinSpectrum() const { return m_min;};
105 //! return maximum of histogram spectrum
106 const T& getMaxSpectrum() const { return m_max;};
107 //! return step size of histogram containers
108 TOut getStep() const { return m_step;};
109 //! return number of points in histogram
110 unsigned int getSum() const;
111 /*! \brief returns area under the histogram
112
113 The Simpson rule is used to integrate the histogram.
114 */
115 TOut getArea() const;
116
117 /*! \brief Computes the moments of the histogram
118
119 \param data dataset
120 \param ave mean
121 \f[ \bar x = \frac{1}{N} \sum_{j=1}^N x_j\f]
122 \param adev mean absolute deviation
123 \f[ adev(X) = \frac{1}{N} \sum_{j=1}^N | x_j - \bar x |\f]
124 \param var average deviation:
125 \f[ \mbox{Var}(X) = \frac{1}{N-1} \sum_{j=1}^N (x_j - \bar x)^2\f]
126 \param sdev standard deviation:
127 \f[ \sigma(X) = \sqrt{var(\bar x) }\f]
128 \param skew skewness
129 \f[ \mbox{Skew}(X) = \frac{1}{N}\sum_{j=1}^N \left[ \frac{x_j - \bar x}{\sigma}\right]^3\f]
130 \param kurt kurtosis
131 \f[ \mbox{Kurt}(X) = \left\{ \frac{1}{N}\sum_{j=1}^N \left[ \frac{x_j - \bar x}{\sigma}\right]^4 \right\} - 3\f]
132
133 */
134 static void getMoments(const std::vector<T>& data, TOut& ave, TOut& adev, TOut& sdev, TOut& var, TOut& skew, TOut& kurt);
135
136 //! return number of containers
137 unsigned int getSize() const { return m_counters.size();};
138 //! returns i-th counter
139 unsigned int operator [] (unsigned int i) const {
140 // ASSERT( i < m_counters.size() );
141 return m_counters[i];
142 };
143 //! return the computed histogram
144 const std::vector<unsigned int>& geTHistogram() const { return m_counters;};
145 //! return the computed histogram, in TOuts
146 std::vector<TOut> geTHistogramD() const;
147 /*! return the normalized computed histogram
148
149 \return the histogram such that the area is equal to 1
150 */
151 std::vector<TOut> getNormalizedHistogram() const;
152 //! returns left containers position
153 std::vector<TOut> getLeftContainers() const;
154 //! returns center containers position
155 std::vector<TOut> getCenterContainers() const;
156 //@}
157 protected:
158 //! Computes the step
159 void computeStep() { m_step = (TOut)(((TOut)(m_max-m_min)) / (m_counters.size()-1));};
160 //! Data accumulators
161 std::vector<unsigned int> m_counters;
162 //! minimum of dataset
163 T m_min;
164 //! maximum of dataset
165 T m_max;
166 //! width of container
167 TOut m_step;
168 };
169
170 template<class T, class TOut>
171 THistogram<T,TOut>::THistogram(unsigned int counters)
172 : m_counters(counters,0), m_min(0), m_max(0), m_step(0)
173 {
174
175 }
176
177 template<class T, class TOut>
178 void THistogram<T,TOut>::resize( unsigned int counters )
179 {
180 clear();
181
182 m_counters.resize(counters,0);
183
184 computeStep();
185 }
186
187 template<class T, class TOut>
188 void THistogram<T,TOut>::compute( const std::vector<T>& v, bool computeMinMax)
189 {
190 using namespace std;
191 unsigned int i;
192 int index;
193
194 if (m_counters.empty())
195 return;
196
197 if (computeMinMax)
198 {
199 m_max = m_min = v[0];
200 for (i=1;i<v.size();i++)
201 {
202 m_max = std::max( m_max, v[i]);
203 m_min = std::min( m_min, v[i]);
204 }
205 }
206
207 computeStep();
208
209 for (i = 0;i < v.size() ; i++)
210 {
211 index=(int) floor( ((TOut)(v[i]-m_min))/m_step ) ;
212
213 if (index >= m_counters.size() || index < 0)
214 return;
215
216 m_counters[index]++;
217 }
218 }
219
220 template<class T, class TOut>
221 void THistogram<T,TOut>::update( const std::vector<T>& v)
222 {
223 if (m_counters.empty())
224 return;
225
226 computeStep();
227
228 TOut size = m_counters.size();
229
230 int index;
231 for (unsigned int i = 0;i < size ; i++)
232 {
233 index = (int)floor(((TOut)(v[i]-m_min))/m_step);
234
235 if (index >= m_counters.size() || index < 0)
236 return;
237
238 m_counters[index]++;
239 }
240 }
241
242 template<class T, class TOut>
243 void THistogram<T,TOut>::update( const T& t)
244 {
245 int index=(int) floor( ((TOut)(t-m_min))/m_step ) ;
246
247 if (index >= m_counters.size() || index < 0)
248 return;
249
250 m_counters[index]++;
251 };
252
253 template<class T, class TOut>
254 std::vector<TOut> THistogram<T,TOut>::geTHistogramD() const
255 {
256 std::vector<TOut> v(m_counters.size());
257 for (unsigned int i = 0;i<m_counters.size(); i++)
258 v[i]=(TOut)m_counters[i];
259
260 return v;
261 }
262
263 template <class T, class TOut>
264 std::vector<TOut> THistogram<T,TOut>::getLeftContainers() const
265 {
266 std::vector<TOut> x( m_counters.size());
267
268 for (unsigned int i = 0;i<m_counters.size(); i++)
269 x[i]= m_min + i*m_step;
270
271 return x;
272 }
273
274 template <class T, class TOut>
275 std::vector<TOut> THistogram<T,TOut>::getCenterContainers() const
276 {
277 std::vector<TOut> x( m_counters.size());
278
279 for (unsigned int i = 0;i<m_counters.size(); i++)
280 x[i]= m_min + (i+0.5)*m_step;
281
282 return x;
283 }
284
285 template <class T, class TOut>
286 unsigned int THistogram<T,TOut>::getSum() const
287 {
288 unsigned int sum = 0;
289 for (unsigned int i = 0;i<m_counters.size(); i++)
290 sum+=m_counters[i];
291
292 return sum;
293 }
294
295 template <class T, class TOut>
296 TOut THistogram<T,TOut>::getArea() const
297 {
298 const size_t n=m_counters.size();
299 TOut area=0;
300
301 if (n>6)
302 {
303 area=3.0/8.0*(m_counters[0]+m_counters[n-1])
304 +7.0/6.0*(m_counters[1]+m_counters[n-2])
305 +23.0/24.0*(m_counters[2]+m_counters[n-3]);
306 for (unsigned int i=3;i<n-3;i++)
307 {
308 area+=m_counters[i];
309 }
310 }
311 else if (n>4)
312 {
313 area=5.0/12.0*(m_counters[0]+m_counters[n-1])
314 +13.0/12.0*(m_counters[1]+m_counters[n-2]);
315 for (unsigned int i=2;i<n-2;i++)
316 {
317 area+=m_counters[i];
318 }
319 }
320 else if (n>1)
321 {
322 area=1/2.0*(m_counters[0]+m_counters[n-1]);
323 for (unsigned int i=1;i<n-1;i++)
324 {
325 area+=m_counters[i];
326 }
327 }
328 else
329 area=0;
330
331 return area*m_step;
332 }
333
334 template <class T, class TOut>
335 std::vector<TOut> THistogram<T,TOut>::getNormalizedHistogram() const
336 {
337 std::vector<TOut> normCounters( m_counters.size());
338 TOut area = (TOut)getArea();
339
340 for (unsigned int i = 0;i<m_counters.size(); i++)
341 {
342 normCounters[i]= (TOut)m_counters[i]/area;
343 }
344
345 return normCounters;
346 };
347
348 template <class T, class TOut>
349 void THistogram<T,TOut>::getMoments(const std::vector<T>& data, TOut& ave, TOut& adev, TOut& sdev, TOut& var, TOut& skew, TOut& kurt)
350 {
351 int j;
352 double ep=0.0,s,p;
353 const size_t n = data.size();
354
355 if (n <= 1)
356 // nrerror("n must be at least 2 in moment");
357 return;
358
359 s=0.0; // First pass to get the mean.
360 for (j=0;j<n;j++)
361 s += data[j];
362
363 ave=s/(n);
364 adev=var=skew=kurt=0.0;
365 /* Second pass to get the first (absolute), second,
366 third, and fourth moments of the
367 deviation from the mean. */
368
369 for (j=0;j<n;j++)
370 {
371 adev += fabs(s=data[j]-(ave));
372 ep += s;
373 var += (p=s*s);
374 skew += (p *= s);
375 kurt += (p *= s);
376 }
377
378
379 adev /= n;
380 var=(var-ep*ep/n)/(n-1); // Corrected two-pass formula.
381 sdev=sqrt(var); // Put the pieces together according to the conventional definitions.
382 if (var)
383 {
384 skew /= (n*(var)*(sdev));
385 kurt=(kurt)/(n*(var)*(var))-3.0;
386 }
387 else
388 //nrerror("No skew/kurtosis when variance = 0 (in moment)");
389 return;
390 }
391
392 #endif
393