ClusterMeltSegmenter.cpp
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2 
3 /*
4  * ClusterMeltSegmenter.cpp
5  *
6  * Created by Mark Levy on 23/03/2006.
7  * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
8 
9  This program is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 2 of the
12  License, or (at your option) any later version. See the file
13  COPYING included with this distribution for more information.
14  */
15 
16 #include <cfloat>
17 #include <cmath>
18 
19 #include "ClusterMeltSegmenter.h"
20 #include "cluster_segmenter.h"
21 #include "segment.h"
22 
23 #include "dsp/transforms/FFT.h"
26 #include "dsp/mfcc/MFCC.h"
27 
28 using std::vector;
29 
31  window(NULL),
32  fft(NULL),
33  constq(NULL),
34  mfcc(NULL),
35  featureType(params.featureType),
36  hopSize(params.hopSize),
37  windowSize(params.windowSize),
38  fmin(params.fmin),
39  fmax(params.fmax),
40  nbins(params.nbins),
41  ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
42  nHMMStates(params.nHMMStates),
43  nclusters(params.nclusters),
44  histogramLength(params.histogramLength),
45  neighbourhoodLimit(params.neighbourhoodLimit),
46  decimator(NULL)
47 {
48 }
49 
51 {
52  samplerate = fs;
53 
56 
57  // run internal processing at 11025 or thereabouts
58  int internalRate = 11025;
59  int decimationFactor = samplerate / internalRate;
60  if (decimationFactor < 1) decimationFactor = 1;
61 
62  // must be a power of two
63  while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
64 
65  if (decimationFactor > Decimator::getHighestSupportedFactor()) {
66  decimationFactor = Decimator::getHighestSupportedFactor();
67  }
68 
69  if (decimationFactor > 1) {
70  decimator = new Decimator(getWindowsize(), decimationFactor);
71  }
72 
73  CQConfig config;
74  config.FS = samplerate / decimationFactor;
75  config.min = fmin;
76  config.max = fmax;
77  config.BPO = nbins;
78  config.CQThresh = 0.0054;
79 
80  constq = new ConstantQ(config);
82 
83  ncoeff = constq->getK();
84 
85  fft = new FFTReal(constq->getFFTLength());
86 
87  } else if (featureType == FEATURE_TYPE_MFCC) {
88 
89  // run internal processing at 22050 or thereabouts
90  int internalRate = 22050;
91  int decimationFactor = samplerate / internalRate;
92  if (decimationFactor < 1) decimationFactor = 1;
93 
94  // must be a power of two
95  while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
96 
97  if (decimationFactor > Decimator::getHighestSupportedFactor()) {
98  decimationFactor = Decimator::getHighestSupportedFactor();
99  }
100 
101  if (decimationFactor > 1) {
102  decimator = new Decimator(getWindowsize(), decimationFactor);
103  }
104 
105  MFCCConfig config(samplerate / decimationFactor);
106  config.fftsize = 2048;
107  config.nceps = 19;
108  config.want_c0 = true;
109 
110  mfcc = new MFCC(config);
111  ncoeff = config.nceps + 1;
112  }
113 }
114 
116 {
117  delete window;
118  delete constq;
119  delete decimator;
120  delete fft;
121 }
122 
123 int
125 {
126  return static_cast<int>(windowSize * samplerate + 0.001);
127 }
128 
129 int
131 {
132  return static_cast<int>(hopSize * samplerate + 0.001);
133 }
134 
135 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
136 {
139  extractFeaturesConstQ(samples, nsamples);
140  } else if (featureType == FEATURE_TYPE_MFCC) {
141  extractFeaturesMFCC(samples, nsamples);
142  }
143 }
144 
145 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
146 {
147  if (!constq) {
148  std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
149  << "No const-q: initialise not called?"
150  << std::endl;
151  return;
152  }
153 
154  if (nsamples < getWindowsize()) {
155  std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
156  return;
157  }
158 
159  int fftsize = constq->getFFTLength();
160 
161  if (!window || window->getSize() != fftsize) {
162  delete window;
163  window = new Window<double>(HammingWindow, fftsize);
164  }
165 
166  vector<double> cq(ncoeff);
167 
168  for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
169 
170  const double *psource = samples;
171  int pcount = nsamples;
172 
173  if (decimator) {
174  pcount = nsamples / decimator->getFactor();
175  double *decout = new double[pcount];
176  decimator->process(samples, decout);
177  psource = decout;
178  }
179 
180  int origin = 0;
181 
182 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
183 
184  int frames = 0;
185 
186  double *frame = new double[fftsize];
187  double *real = new double[fftsize];
188  double *imag = new double[fftsize];
189  double *cqre = new double[ncoeff];
190  double *cqim = new double[ncoeff];
191 
192  while (origin <= pcount) {
193 
194  // always need at least one fft window per block, but after
195  // that we want to avoid having any incomplete ones
196  if (origin > 0 && origin + fftsize >= pcount) break;
197 
198  for (int i = 0; i < fftsize; ++i) {
199  if (origin + i < pcount) {
200  frame[i] = psource[origin + i];
201  } else {
202  frame[i] = 0.0;
203  }
204  }
205 
206  for (int i = 0; i < fftsize/2; ++i) {
207  double value = frame[i];
208  frame[i] = frame[i + fftsize/2];
209  frame[i + fftsize/2] = value;
210  }
211 
212  window->cut(frame);
213 
214  fft->forward(frame, real, imag);
215 
216  constq->process(real, imag, cqre, cqim);
217 
218  for (int i = 0; i < ncoeff; ++i) {
219  cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
220  }
221  ++frames;
222 
223  origin += fftsize/2;
224  }
225 
226  delete [] cqre;
227  delete [] cqim;
228  delete [] real;
229  delete [] imag;
230  delete [] frame;
231 
232  for (int i = 0; i < ncoeff; ++i) {
233  cq[i] /= frames;
234  }
235 
236  if (decimator) delete[] psource;
237 
238  features.push_back(cq);
239 }
240 
241 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
242 {
243  if (!mfcc) {
244  std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
245  << "No mfcc: initialise not called?"
246  << std::endl;
247  return;
248  }
249 
250  if (nsamples < getWindowsize()) {
251  std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
252  return;
253  }
254 
255  int fftsize = mfcc->getfftlength();
256 
257  vector<double> cc(ncoeff);
258 
259  for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
260 
261  const double *psource = samples;
262  int pcount = nsamples;
263 
264  if (decimator) {
265  pcount = nsamples / decimator->getFactor();
266  double *decout = new double[pcount];
267  decimator->process(samples, decout);
268  psource = decout;
269  }
270 
271  int origin = 0;
272  int frames = 0;
273 
274  double *frame = new double[fftsize];
275  double *ccout = new double[ncoeff];
276 
277  while (origin <= pcount) {
278 
279  // always need at least one fft window per block, but after
280  // that we want to avoid having any incomplete ones
281  if (origin > 0 && origin + fftsize >= pcount) break;
282 
283  for (int i = 0; i < fftsize; ++i) {
284  if (origin + i < pcount) {
285  frame[i] = psource[origin + i];
286  } else {
287  frame[i] = 0.0;
288  }
289  }
290 
291  mfcc->process(frame, ccout);
292 
293  for (int i = 0; i < ncoeff; ++i) {
294  cc[i] += ccout[i];
295  }
296  ++frames;
297 
298  origin += fftsize/2;
299  }
300 
301  delete [] ccout;
302  delete [] frame;
303 
304  for (int i = 0; i < ncoeff; ++i) {
305  cc[i] /= frames;
306  }
307 
308  if (decimator) delete[] psource;
309 
310  features.push_back(cc);
311 }
312 
314 {
315  nclusters = m;
316  segment();
317 }
318 
319 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
320 {
321  features = f;
323 }
324 
326 {
327  delete constq;
328  constq = 0;
329  delete mfcc;
330  mfcc = 0;
331  delete decimator;
332  decimator = 0;
333 
334  int sz = features.size();
335 
336  if (sz < histogramLength) return;
337 /*
338  std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
339  << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
340 */
341  // copy the features to a native array and use the existing C segmenter...
342  double** arrFeatures = new double*[features.size()];
343  for (int i = 0; i < sz; i++) {
345  arrFeatures[i] = new double[features[0].size()];
346  for (int j = 0; j < int(features[0].size()); j++) {
347  arrFeatures[i][j] = features[i][j];
348  }
349  } else {
350  arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
351  for (int j = 0; j < ncoeff; j++) {
352  arrFeatures[i][j] = features[i][j];
353  }
354  }
355  }
356 
357  q = new int[features.size()];
358 
361  cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
363  } else {
364  constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
366  }
367 
368  // convert the cluster assignment sequence to a segmentation
369  makeSegmentation(q, features.size());
370 
371  // de-allocate arrays
372  delete [] q;
373  for (int i = 0; i < int(features.size()); i++) delete [] arrFeatures[i];
374  delete [] arrFeatures;
375 
376  // clear the features
377  clear();
378 }
379 
381 {
382  segmentation.segments.clear();
385 
387  segment.start = 0;
388  segment.type = q[0];
389 
390  for (int i = 1; i < len; i++) {
391  if (q[i] != q[i-1]) {
392  segment.end = i * getHopsize();
393  segmentation.segments.push_back(segment);
394  segment.type = q[i];
395  segment.start = segment.end;
396  }
397  }
398  segment.end = len * getHopsize();
399  segmentation.segments.push_back(segment);
400 }
401 
int fftsize
Definition: MFCC.h:25
int start
Definition: Segmenter.h:26
void process(const double *src, double *dst)
Process inLength samples (as supplied to constructor) from src and write inLength / decFactor samples...
Definition: Decimator.cpp:195
void extractFeaturesMFCC(const double *, int)
void extractFeaturesConstQ(const double *, int)
int process(const double *inframe, double *outceps)
Process time-domain input data.
Definition: MFCC.cpp:205
int getFFTLength()
Definition: ConstantQ.h:46
int getSize() const
Definition: Window.h:69
static int getHighestSupportedFactor()
Definition: Decimator.h:57
int nceps
Definition: MFCC.h:26
virtual void clear()
Definition: Segmenter.h:52
void cluster_segment(int *q, double **features, int frames_read, int feature_length, int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
int nsegtypes
Definition: Segmenter.h:34
void constq_segment(int *q, double **features, int frames_read, int bins, int ncoeff, int feature_type, int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
ClusterMeltSegmenter(ClusterMeltSegmenterParams params)
void process(const double *FFTRe, const double *FFTIm, double *CQRe, double *CQIm)
Definition: ConstantQ.cpp:195
double CQThresh
Definition: ConstantQ.h:28
std::vector< std::vector< double > > features
Definition: Segmenter.h:55
int BPO
Definition: ConstantQ.h:27
double FS
Definition: ConstantQ.h:24
int getfftlength() const
Definition: MFCC.h:56
void forward(const double *realIn, double *realOut, double *imagOut)
Carry out a forward real-to-complex transform of size nsamples, where nsamples is the value provided ...
Definition: FFT.cpp:184
void setFeatures(const std::vector< std::vector< double > > &f)
double max
Definition: ConstantQ.h:26
double min
Definition: ConstantQ.h:25
int samplerate
Definition: Segmenter.h:35
void cut(T *src) const
Definition: Window.h:61
int samplerate
Definition: Segmenter.h:57
Segmentation segmentation
Definition: Segmenter.h:56
Definition: FFT.h:52
int end
Definition: Segmenter.h:27
void sparsekernel()
Definition: ConstantQ.cpp:38
bool want_c0
Definition: MFCC.h:28
Decimator carries out a fast downsample by a power-of-two factor.
Definition: Decimator.h:24
Window< double > * window
virtual void initialise(int samplerate)
void makeSegmentation(int *q, int len)
int getK()
Definition: ConstantQ.h:45
std::vector< Segment > segments
Definition: Segmenter.h:36
int getFactor() const
Definition: Decimator.h:56
int type
Definition: Segmenter.h:28
virtual void extractFeatures(const double *samples, int nsamples)
Definition: MFCC.h:35