cannam@24
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
cannam@24
|
2
|
cannam@18
|
3 /*
|
cannam@24
|
4 * ClusterMeltSegmenter.cpp
|
cannam@18
|
5 *
|
cannam@24
|
6 * Created by Mark Levy on 23/03/2006.
|
cannam@24
|
7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
|
Chris@84
|
8
|
Chris@84
|
9 This program is free software; you can redistribute it and/or
|
Chris@84
|
10 modify it under the terms of the GNU General Public License as
|
Chris@84
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@84
|
12 License, or (at your option) any later version. See the file
|
Chris@84
|
13 COPYING included with this distribution for more information.
|
cannam@18
|
14 */
|
cannam@18
|
15
|
cannam@18
|
16 #include <cfloat>
|
cannam@18
|
17 #include <cmath>
|
cannam@18
|
18
|
cannam@18
|
19 #include "ClusterMeltSegmenter.h"
|
cannam@18
|
20 #include "cluster_segmenter.h"
|
cannam@18
|
21 #include "segment.h"
|
cannam@18
|
22
|
cannam@20
|
23 #include "dsp/transforms/FFT.h"
|
cannam@24
|
24 #include "dsp/chromagram/ConstantQ.h"
|
cannam@24
|
25 #include "dsp/rateconversion/Decimator.h"
|
cannam@26
|
26 #include "dsp/mfcc/MFCC.h"
|
cannam@20
|
27
|
cannam@24
|
28 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
|
cannam@24
|
29 window(NULL),
|
cannam@64
|
30 fft(NULL),
|
cannam@24
|
31 constq(NULL),
|
cannam@26
|
32 mfcc(NULL),
|
cannam@24
|
33 featureType(params.featureType),
|
cannam@24
|
34 hopSize(params.hopSize),
|
cannam@24
|
35 windowSize(params.windowSize),
|
cannam@24
|
36 fmin(params.fmin),
|
cannam@24
|
37 fmax(params.fmax),
|
cannam@24
|
38 nbins(params.nbins),
|
cannam@24
|
39 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
|
cannam@24
|
40 nHMMStates(params.nHMMStates),
|
cannam@24
|
41 nclusters(params.nclusters),
|
cannam@24
|
42 histogramLength(params.histogramLength),
|
cannam@24
|
43 neighbourhoodLimit(params.neighbourhoodLimit),
|
cannam@26
|
44 decimator(NULL)
|
cannam@18
|
45 {
|
cannam@18
|
46 }
|
cannam@18
|
47
|
cannam@18
|
48 void ClusterMeltSegmenter::initialise(int fs)
|
cannam@18
|
49 {
|
cannam@24
|
50 samplerate = fs;
|
cannam@24
|
51
|
cannam@26
|
52 if (featureType == FEATURE_TYPE_CONSTQ ||
|
cannam@26
|
53 featureType == FEATURE_TYPE_CHROMA) {
|
cannam@26
|
54
|
cannam@26
|
55 // run internal processing at 11025 or thereabouts
|
cannam@24
|
56 int internalRate = 11025;
|
cannam@24
|
57 int decimationFactor = samplerate / internalRate;
|
cannam@24
|
58 if (decimationFactor < 1) decimationFactor = 1;
|
cannam@24
|
59
|
cannam@24
|
60 // must be a power of two
|
cannam@24
|
61 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
|
cannam@24
|
62
|
cannam@24
|
63 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
|
cannam@24
|
64 decimationFactor = Decimator::getHighestSupportedFactor();
|
cannam@24
|
65 }
|
cannam@24
|
66
|
cannam@24
|
67 if (decimationFactor > 1) {
|
cannam@24
|
68 decimator = new Decimator(getWindowsize(), decimationFactor);
|
cannam@24
|
69 }
|
cannam@24
|
70
|
cannam@24
|
71 CQConfig config;
|
cannam@24
|
72 config.FS = samplerate / decimationFactor;
|
cannam@24
|
73 config.min = fmin;
|
cannam@24
|
74 config.max = fmax;
|
cannam@24
|
75 config.BPO = nbins;
|
cannam@24
|
76 config.CQThresh = 0.0054;
|
cannam@24
|
77
|
cannam@24
|
78 constq = new ConstantQ(config);
|
cannam@24
|
79 constq->sparsekernel();
|
cannam@26
|
80
|
cannam@26
|
81 ncoeff = constq->getK();
|
cannam@64
|
82
|
cannam@64
|
83 fft = new FFTReal(constq->getfftlength());
|
cannam@26
|
84
|
cannam@26
|
85 } else if (featureType == FEATURE_TYPE_MFCC) {
|
cannam@24
|
86
|
cannam@27
|
87 // run internal processing at 22050 or thereabouts
|
cannam@27
|
88 int internalRate = 22050;
|
cannam@27
|
89 int decimationFactor = samplerate / internalRate;
|
cannam@27
|
90 if (decimationFactor < 1) decimationFactor = 1;
|
cannam@27
|
91
|
cannam@27
|
92 // must be a power of two
|
cannam@27
|
93 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
|
cannam@27
|
94
|
cannam@27
|
95 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
|
cannam@27
|
96 decimationFactor = Decimator::getHighestSupportedFactor();
|
cannam@27
|
97 }
|
cannam@27
|
98
|
cannam@27
|
99 if (decimationFactor > 1) {
|
cannam@27
|
100 decimator = new Decimator(getWindowsize(), decimationFactor);
|
cannam@27
|
101 }
|
cannam@27
|
102
|
cannam@30
|
103 MFCCConfig config(samplerate / decimationFactor);
|
cannam@27
|
104 config.fftsize = 2048;
|
cannam@27
|
105 config.nceps = 19;
|
cannam@27
|
106 config.want_c0 = true;
|
cannam@26
|
107
|
cannam@26
|
108 mfcc = new MFCC(config);
|
cannam@27
|
109 ncoeff = config.nceps + 1;
|
cannam@24
|
110 }
|
cannam@18
|
111 }
|
cannam@18
|
112
|
cannam@18
|
113 ClusterMeltSegmenter::~ClusterMeltSegmenter()
|
cannam@18
|
114 {
|
cannam@24
|
115 delete window;
|
cannam@24
|
116 delete constq;
|
cannam@24
|
117 delete decimator;
|
cannam@64
|
118 delete fft;
|
cannam@20
|
119 }
|
cannam@20
|
120
|
cannam@20
|
121 int
|
cannam@20
|
122 ClusterMeltSegmenter::getWindowsize()
|
cannam@20
|
123 {
|
cannam@44
|
124 return static_cast<int>(windowSize * samplerate + 0.001);
|
cannam@20
|
125 }
|
cannam@20
|
126
|
cannam@20
|
127 int
|
cannam@20
|
128 ClusterMeltSegmenter::getHopsize()
|
cannam@20
|
129 {
|
cannam@44
|
130 return static_cast<int>(hopSize * samplerate + 0.001);
|
cannam@18
|
131 }
|
cannam@18
|
132
|
cannam@24
|
133 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
|
cannam@18
|
134 {
|
cannam@26
|
135 if (featureType == FEATURE_TYPE_CONSTQ ||
|
cannam@26
|
136 featureType == FEATURE_TYPE_CHROMA) {
|
cannam@26
|
137 extractFeaturesConstQ(samples, nsamples);
|
cannam@26
|
138 } else if (featureType == FEATURE_TYPE_MFCC) {
|
cannam@26
|
139 extractFeaturesMFCC(samples, nsamples);
|
cannam@26
|
140 }
|
cannam@26
|
141 }
|
cannam@26
|
142
|
cannam@26
|
143 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
|
cannam@26
|
144 {
|
cannam@24
|
145 if (!constq) {
|
cannam@26
|
146 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
|
cannam@26
|
147 << "No const-q: initialise not called?"
|
cannam@24
|
148 << std::endl;
|
cannam@24
|
149 return;
|
cannam@24
|
150 }
|
cannam@20
|
151
|
cannam@24
|
152 if (nsamples < getWindowsize()) {
|
cannam@24
|
153 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
|
cannam@24
|
154 return;
|
cannam@24
|
155 }
|
cannam@24
|
156
|
cannam@24
|
157 int fftsize = constq->getfftlength();
|
cannam@24
|
158
|
cannam@24
|
159 if (!window || window->getSize() != fftsize) {
|
cannam@24
|
160 delete window;
|
cannam@24
|
161 window = new Window<double>(HammingWindow, fftsize);
|
cannam@24
|
162 }
|
cannam@24
|
163
|
cannam@24
|
164 vector<double> cq(ncoeff);
|
cannam@24
|
165
|
cannam@24
|
166 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
|
cannam@24
|
167
|
cannam@24
|
168 const double *psource = samples;
|
cannam@24
|
169 int pcount = nsamples;
|
cannam@24
|
170
|
cannam@24
|
171 if (decimator) {
|
cannam@24
|
172 pcount = nsamples / decimator->getFactor();
|
cannam@24
|
173 double *decout = new double[pcount];
|
cannam@24
|
174 decimator->process(samples, decout);
|
cannam@24
|
175 psource = decout;
|
cannam@24
|
176 }
|
cannam@24
|
177
|
cannam@24
|
178 int origin = 0;
|
cannam@24
|
179
|
cannam@24
|
180 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
|
cannam@24
|
181
|
cannam@24
|
182 int frames = 0;
|
cannam@24
|
183
|
cannam@24
|
184 double *frame = new double[fftsize];
|
cannam@24
|
185 double *real = new double[fftsize];
|
cannam@24
|
186 double *imag = new double[fftsize];
|
cannam@24
|
187 double *cqre = new double[ncoeff];
|
cannam@24
|
188 double *cqim = new double[ncoeff];
|
cannam@24
|
189
|
cannam@24
|
190 while (origin <= pcount) {
|
cannam@24
|
191
|
cannam@24
|
192 // always need at least one fft window per block, but after
|
cannam@24
|
193 // that we want to avoid having any incomplete ones
|
cannam@24
|
194 if (origin > 0 && origin + fftsize >= pcount) break;
|
cannam@24
|
195
|
cannam@24
|
196 for (int i = 0; i < fftsize; ++i) {
|
cannam@24
|
197 if (origin + i < pcount) {
|
cannam@24
|
198 frame[i] = psource[origin + i];
|
cannam@24
|
199 } else {
|
cannam@24
|
200 frame[i] = 0.0;
|
cannam@24
|
201 }
|
cannam@24
|
202 }
|
cannam@24
|
203
|
cannam@24
|
204 for (int i = 0; i < fftsize/2; ++i) {
|
cannam@24
|
205 double value = frame[i];
|
cannam@24
|
206 frame[i] = frame[i + fftsize/2];
|
cannam@24
|
207 frame[i + fftsize/2] = value;
|
cannam@24
|
208 }
|
cannam@24
|
209
|
cannam@24
|
210 window->cut(frame);
|
cannam@24
|
211
|
Chris@114
|
212 fft->forward(frame, real, imag);
|
cannam@24
|
213
|
cannam@24
|
214 constq->process(real, imag, cqre, cqim);
|
cannam@18
|
215
|
cannam@24
|
216 for (int i = 0; i < ncoeff; ++i) {
|
cannam@24
|
217 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
|
cannam@24
|
218 }
|
cannam@24
|
219 ++frames;
|
cannam@20
|
220
|
cannam@24
|
221 origin += fftsize/2;
|
cannam@24
|
222 }
|
cannam@20
|
223
|
cannam@24
|
224 delete [] cqre;
|
cannam@24
|
225 delete [] cqim;
|
cannam@24
|
226 delete [] real;
|
cannam@24
|
227 delete [] imag;
|
cannam@24
|
228 delete [] frame;
|
cannam@20
|
229
|
cannam@24
|
230 for (int i = 0; i < ncoeff; ++i) {
|
cannam@24
|
231 cq[i] /= frames;
|
cannam@24
|
232 }
|
cannam@20
|
233
|
cannam@24
|
234 if (decimator) delete[] psource;
|
cannam@20
|
235
|
cannam@24
|
236 features.push_back(cq);
|
cannam@18
|
237 }
|
cannam@18
|
238
|
cannam@26
|
239 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
|
cannam@26
|
240 {
|
cannam@26
|
241 if (!mfcc) {
|
cannam@26
|
242 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
|
cannam@26
|
243 << "No mfcc: initialise not called?"
|
cannam@26
|
244 << std::endl;
|
cannam@26
|
245 return;
|
cannam@26
|
246 }
|
cannam@26
|
247
|
cannam@26
|
248 if (nsamples < getWindowsize()) {
|
cannam@26
|
249 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
|
cannam@26
|
250 return;
|
cannam@26
|
251 }
|
cannam@26
|
252
|
cannam@26
|
253 int fftsize = mfcc->getfftlength();
|
cannam@26
|
254
|
cannam@26
|
255 vector<double> cc(ncoeff);
|
cannam@26
|
256
|
cannam@26
|
257 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
|
cannam@26
|
258
|
cannam@26
|
259 const double *psource = samples;
|
cannam@26
|
260 int pcount = nsamples;
|
cannam@26
|
261
|
cannam@27
|
262 if (decimator) {
|
cannam@27
|
263 pcount = nsamples / decimator->getFactor();
|
cannam@27
|
264 double *decout = new double[pcount];
|
cannam@27
|
265 decimator->process(samples, decout);
|
cannam@27
|
266 psource = decout;
|
cannam@27
|
267 }
|
cannam@27
|
268
|
cannam@26
|
269 int origin = 0;
|
cannam@26
|
270 int frames = 0;
|
cannam@26
|
271
|
cannam@26
|
272 double *frame = new double[fftsize];
|
cannam@26
|
273 double *ccout = new double[ncoeff];
|
cannam@26
|
274
|
cannam@26
|
275 while (origin <= pcount) {
|
cannam@26
|
276
|
cannam@26
|
277 // always need at least one fft window per block, but after
|
cannam@26
|
278 // that we want to avoid having any incomplete ones
|
cannam@26
|
279 if (origin > 0 && origin + fftsize >= pcount) break;
|
cannam@26
|
280
|
cannam@26
|
281 for (int i = 0; i < fftsize; ++i) {
|
cannam@26
|
282 if (origin + i < pcount) {
|
cannam@26
|
283 frame[i] = psource[origin + i];
|
cannam@26
|
284 } else {
|
cannam@26
|
285 frame[i] = 0.0;
|
cannam@26
|
286 }
|
cannam@26
|
287 }
|
cannam@26
|
288
|
cannam@30
|
289 mfcc->process(frame, ccout);
|
cannam@26
|
290
|
cannam@26
|
291 for (int i = 0; i < ncoeff; ++i) {
|
cannam@26
|
292 cc[i] += ccout[i];
|
cannam@26
|
293 }
|
cannam@26
|
294 ++frames;
|
cannam@26
|
295
|
cannam@26
|
296 origin += fftsize/2;
|
cannam@26
|
297 }
|
cannam@26
|
298
|
cannam@26
|
299 delete [] ccout;
|
cannam@26
|
300 delete [] frame;
|
cannam@26
|
301
|
cannam@26
|
302 for (int i = 0; i < ncoeff; ++i) {
|
cannam@26
|
303 cc[i] /= frames;
|
cannam@26
|
304 }
|
cannam@26
|
305
|
cannam@27
|
306 if (decimator) delete[] psource;
|
cannam@27
|
307
|
cannam@26
|
308 features.push_back(cc);
|
cannam@26
|
309 }
|
cannam@26
|
310
|
cannam@18
|
311 void ClusterMeltSegmenter::segment(int m)
|
cannam@18
|
312 {
|
cannam@24
|
313 nclusters = m;
|
cannam@24
|
314 segment();
|
cannam@18
|
315 }
|
cannam@18
|
316
|
cannam@18
|
317 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
|
cannam@18
|
318 {
|
cannam@24
|
319 features = f;
|
cannam@24
|
320 featureType = FEATURE_TYPE_UNKNOWN;
|
cannam@18
|
321 }
|
cannam@18
|
322
|
cannam@18
|
323 void ClusterMeltSegmenter::segment()
|
cannam@18
|
324 {
|
cannam@26
|
325 delete constq;
|
cannam@26
|
326 constq = 0;
|
cannam@26
|
327 delete mfcc;
|
cannam@26
|
328 mfcc = 0;
|
cannam@26
|
329 delete decimator;
|
cannam@26
|
330 decimator = 0;
|
cannam@58
|
331
|
cannam@58
|
332 if (features.size() < histogramLength) return;
|
cannam@58
|
333 /*
|
cannam@24
|
334 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
|
cannam@24
|
335 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
|
cannam@58
|
336 */
|
cannam@24
|
337 // copy the features to a native array and use the existing C segmenter...
|
cannam@24
|
338 double** arrFeatures = new double*[features.size()];
|
cannam@24
|
339 for (int i = 0; i < features.size(); i++)
|
cannam@24
|
340 {
|
cannam@24
|
341 if (featureType == FEATURE_TYPE_UNKNOWN) {
|
cannam@24
|
342 arrFeatures[i] = new double[features[0].size()];
|
cannam@24
|
343 for (int j = 0; j < features[0].size(); j++)
|
cannam@24
|
344 arrFeatures[i][j] = features[i][j];
|
cannam@24
|
345 } else {
|
cannam@24
|
346 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
|
cannam@24
|
347 for (int j = 0; j < ncoeff; j++)
|
cannam@24
|
348 arrFeatures[i][j] = features[i][j];
|
cannam@24
|
349 }
|
cannam@24
|
350 }
|
cannam@18
|
351
|
cannam@24
|
352 q = new int[features.size()];
|
cannam@18
|
353
|
cannam@26
|
354 if (featureType == FEATURE_TYPE_UNKNOWN ||
|
cannam@26
|
355 featureType == FEATURE_TYPE_MFCC)
|
cannam@24
|
356 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
|
cannam@24
|
357 nclusters, neighbourhoodLimit);
|
cannam@24
|
358 else
|
cannam@24
|
359 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
|
cannam@24
|
360 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
|
cannam@18
|
361
|
cannam@24
|
362 // convert the cluster assignment sequence to a segmentation
|
cannam@24
|
363 makeSegmentation(q, features.size());
|
cannam@18
|
364
|
cannam@24
|
365 // de-allocate arrays
|
cannam@24
|
366 delete [] q;
|
cannam@24
|
367 for (int i = 0; i < features.size(); i++)
|
cannam@24
|
368 delete [] arrFeatures[i];
|
cannam@24
|
369 delete [] arrFeatures;
|
cannam@18
|
370
|
cannam@24
|
371 // clear the features
|
cannam@24
|
372 clear();
|
cannam@18
|
373 }
|
cannam@18
|
374
|
cannam@18
|
375 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
|
cannam@18
|
376 {
|
cannam@24
|
377 segmentation.segments.clear();
|
cannam@24
|
378 segmentation.nsegtypes = nclusters;
|
cannam@24
|
379 segmentation.samplerate = samplerate;
|
cannam@18
|
380
|
cannam@24
|
381 Segment segment;
|
cannam@24
|
382 segment.start = 0;
|
cannam@24
|
383 segment.type = q[0];
|
cannam@18
|
384
|
cannam@24
|
385 for (int i = 1; i < len; i++)
|
cannam@24
|
386 {
|
cannam@24
|
387 if (q[i] != q[i-1])
|
cannam@24
|
388 {
|
cannam@24
|
389 segment.end = i * getHopsize();
|
cannam@24
|
390 segmentation.segments.push_back(segment);
|
cannam@24
|
391 segment.type = q[i];
|
cannam@24
|
392 segment.start = segment.end;
|
cannam@24
|
393 }
|
cannam@24
|
394 }
|
cannam@24
|
395 segment.end = len * getHopsize();
|
cannam@24
|
396 segmentation.segments.push_back(segment);
|
cannam@18
|
397 }
|
cannam@18
|
398
|