c@249
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
c@249
|
2
|
c@243
|
3 /*
|
c@249
|
4 * ClusterMeltSegmenter.cpp
|
c@243
|
5 *
|
c@249
|
6 * Created by Mark Levy on 23/03/2006.
|
c@249
|
7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
|
c@249
|
8 * All rights reserved.
|
c@243
|
9 */
|
c@243
|
10
|
c@243
|
11 #include <cfloat>
|
c@243
|
12 #include <cmath>
|
c@243
|
13
|
c@243
|
14 #include "ClusterMeltSegmenter.h"
|
c@243
|
15 #include "cluster_segmenter.h"
|
c@243
|
16 #include "segment.h"
|
c@243
|
17
|
c@245
|
18 #include "dsp/transforms/FFT.h"
|
c@249
|
19 #include "dsp/chromagram/ConstantQ.h"
|
c@249
|
20 #include "dsp/rateconversion/Decimator.h"
|
c@251
|
21 #include "dsp/mfcc/MFCC.h"
|
c@245
|
22
|
c@249
|
23 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
|
c@249
|
24 window(NULL),
|
c@249
|
25 constq(NULL),
|
c@251
|
26 mfcc(NULL),
|
c@249
|
27 featureType(params.featureType),
|
c@249
|
28 hopSize(params.hopSize),
|
c@249
|
29 windowSize(params.windowSize),
|
c@249
|
30 fmin(params.fmin),
|
c@249
|
31 fmax(params.fmax),
|
c@249
|
32 nbins(params.nbins),
|
c@249
|
33 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
|
c@249
|
34 nHMMStates(params.nHMMStates),
|
c@249
|
35 nclusters(params.nclusters),
|
c@249
|
36 histogramLength(params.histogramLength),
|
c@249
|
37 neighbourhoodLimit(params.neighbourhoodLimit),
|
c@251
|
38 decimator(NULL)
|
c@243
|
39 {
|
c@243
|
40 }
|
c@243
|
41
|
c@243
|
42 void ClusterMeltSegmenter::initialise(int fs)
|
c@243
|
43 {
|
c@249
|
44 samplerate = fs;
|
c@249
|
45
|
c@251
|
46 if (featureType == FEATURE_TYPE_CONSTQ ||
|
c@251
|
47 featureType == FEATURE_TYPE_CHROMA) {
|
c@251
|
48
|
c@251
|
49 // run internal processing at 11025 or thereabouts
|
c@249
|
50 int internalRate = 11025;
|
c@249
|
51 int decimationFactor = samplerate / internalRate;
|
c@249
|
52 if (decimationFactor < 1) decimationFactor = 1;
|
c@249
|
53
|
c@249
|
54 // must be a power of two
|
c@249
|
55 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
|
c@249
|
56
|
c@249
|
57 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
|
c@249
|
58 decimationFactor = Decimator::getHighestSupportedFactor();
|
c@249
|
59 }
|
c@249
|
60
|
c@249
|
61 if (decimationFactor > 1) {
|
c@249
|
62 decimator = new Decimator(getWindowsize(), decimationFactor);
|
c@249
|
63 }
|
c@249
|
64
|
c@249
|
65 CQConfig config;
|
c@249
|
66 config.FS = samplerate / decimationFactor;
|
c@249
|
67 config.min = fmin;
|
c@249
|
68 config.max = fmax;
|
c@249
|
69 config.BPO = nbins;
|
c@249
|
70 config.CQThresh = 0.0054;
|
c@249
|
71
|
c@249
|
72 constq = new ConstantQ(config);
|
c@249
|
73 constq->sparsekernel();
|
c@251
|
74
|
c@251
|
75 ncoeff = constq->getK();
|
c@251
|
76
|
c@251
|
77 } else if (featureType == FEATURE_TYPE_MFCC) {
|
c@249
|
78
|
c@252
|
79 // run internal processing at 22050 or thereabouts
|
c@252
|
80 int internalRate = 22050;
|
c@252
|
81 int decimationFactor = samplerate / internalRate;
|
c@252
|
82 if (decimationFactor < 1) decimationFactor = 1;
|
c@252
|
83
|
c@252
|
84 // must be a power of two
|
c@252
|
85 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
|
c@252
|
86
|
c@252
|
87 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
|
c@252
|
88 decimationFactor = Decimator::getHighestSupportedFactor();
|
c@252
|
89 }
|
c@252
|
90
|
c@252
|
91 if (decimationFactor > 1) {
|
c@252
|
92 decimator = new Decimator(getWindowsize(), decimationFactor);
|
c@252
|
93 }
|
c@252
|
94
|
c@255
|
95 MFCCConfig config(samplerate / decimationFactor);
|
c@252
|
96 config.fftsize = 2048;
|
c@252
|
97 config.nceps = 19;
|
c@252
|
98 config.want_c0 = true;
|
c@251
|
99
|
c@251
|
100 mfcc = new MFCC(config);
|
c@252
|
101 ncoeff = config.nceps + 1;
|
c@249
|
102 }
|
c@243
|
103 }
|
c@243
|
104
|
c@243
|
105 ClusterMeltSegmenter::~ClusterMeltSegmenter()
|
c@243
|
106 {
|
c@249
|
107 delete window;
|
c@249
|
108 delete constq;
|
c@249
|
109 delete decimator;
|
c@245
|
110 }
|
c@245
|
111
|
c@245
|
112 int
|
c@245
|
113 ClusterMeltSegmenter::getWindowsize()
|
c@245
|
114 {
|
c@269
|
115 return static_cast<int>(windowSize * samplerate + 0.001);
|
c@245
|
116 }
|
c@245
|
117
|
c@245
|
118 int
|
c@245
|
119 ClusterMeltSegmenter::getHopsize()
|
c@245
|
120 {
|
c@269
|
121 return static_cast<int>(hopSize * samplerate + 0.001);
|
c@243
|
122 }
|
c@243
|
123
|
c@249
|
124 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
|
c@243
|
125 {
|
c@251
|
126 if (featureType == FEATURE_TYPE_CONSTQ ||
|
c@251
|
127 featureType == FEATURE_TYPE_CHROMA) {
|
c@251
|
128 extractFeaturesConstQ(samples, nsamples);
|
c@251
|
129 } else if (featureType == FEATURE_TYPE_MFCC) {
|
c@251
|
130 extractFeaturesMFCC(samples, nsamples);
|
c@251
|
131 }
|
c@251
|
132 }
|
c@251
|
133
|
c@251
|
134 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
|
c@251
|
135 {
|
c@249
|
136 if (!constq) {
|
c@251
|
137 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
|
c@251
|
138 << "No const-q: initialise not called?"
|
c@249
|
139 << std::endl;
|
c@249
|
140 return;
|
c@249
|
141 }
|
c@245
|
142
|
c@249
|
143 if (nsamples < getWindowsize()) {
|
c@249
|
144 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
|
c@249
|
145 return;
|
c@249
|
146 }
|
c@249
|
147
|
c@249
|
148 int fftsize = constq->getfftlength();
|
c@249
|
149
|
c@249
|
150 if (!window || window->getSize() != fftsize) {
|
c@249
|
151 delete window;
|
c@249
|
152 window = new Window<double>(HammingWindow, fftsize);
|
c@249
|
153 }
|
c@249
|
154
|
c@249
|
155 vector<double> cq(ncoeff);
|
c@249
|
156
|
c@249
|
157 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
|
c@249
|
158
|
c@249
|
159 const double *psource = samples;
|
c@249
|
160 int pcount = nsamples;
|
c@249
|
161
|
c@249
|
162 if (decimator) {
|
c@249
|
163 pcount = nsamples / decimator->getFactor();
|
c@249
|
164 double *decout = new double[pcount];
|
c@249
|
165 decimator->process(samples, decout);
|
c@249
|
166 psource = decout;
|
c@249
|
167 }
|
c@249
|
168
|
c@249
|
169 int origin = 0;
|
c@249
|
170
|
c@249
|
171 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
|
c@249
|
172
|
c@249
|
173 int frames = 0;
|
c@249
|
174
|
c@249
|
175 double *frame = new double[fftsize];
|
c@249
|
176 double *real = new double[fftsize];
|
c@249
|
177 double *imag = new double[fftsize];
|
c@249
|
178 double *cqre = new double[ncoeff];
|
c@249
|
179 double *cqim = new double[ncoeff];
|
c@249
|
180
|
c@249
|
181 while (origin <= pcount) {
|
c@249
|
182
|
c@249
|
183 // always need at least one fft window per block, but after
|
c@249
|
184 // that we want to avoid having any incomplete ones
|
c@249
|
185 if (origin > 0 && origin + fftsize >= pcount) break;
|
c@249
|
186
|
c@249
|
187 for (int i = 0; i < fftsize; ++i) {
|
c@249
|
188 if (origin + i < pcount) {
|
c@249
|
189 frame[i] = psource[origin + i];
|
c@249
|
190 } else {
|
c@249
|
191 frame[i] = 0.0;
|
c@249
|
192 }
|
c@249
|
193 }
|
c@249
|
194
|
c@249
|
195 for (int i = 0; i < fftsize/2; ++i) {
|
c@249
|
196 double value = frame[i];
|
c@249
|
197 frame[i] = frame[i + fftsize/2];
|
c@249
|
198 frame[i + fftsize/2] = value;
|
c@249
|
199 }
|
c@249
|
200
|
c@249
|
201 window->cut(frame);
|
c@249
|
202
|
c@249
|
203 FFT::process(fftsize, false, frame, 0, real, imag);
|
c@249
|
204
|
c@249
|
205 constq->process(real, imag, cqre, cqim);
|
c@243
|
206
|
c@249
|
207 for (int i = 0; i < ncoeff; ++i) {
|
c@249
|
208 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
|
c@249
|
209 }
|
c@249
|
210 ++frames;
|
c@245
|
211
|
c@249
|
212 origin += fftsize/2;
|
c@249
|
213 }
|
c@245
|
214
|
c@249
|
215 delete [] cqre;
|
c@249
|
216 delete [] cqim;
|
c@249
|
217 delete [] real;
|
c@249
|
218 delete [] imag;
|
c@249
|
219 delete [] frame;
|
c@245
|
220
|
c@249
|
221 for (int i = 0; i < ncoeff; ++i) {
|
c@249
|
222 cq[i] /= frames;
|
c@249
|
223 }
|
c@245
|
224
|
c@249
|
225 if (decimator) delete[] psource;
|
c@245
|
226
|
c@249
|
227 features.push_back(cq);
|
c@243
|
228 }
|
c@243
|
229
|
c@251
|
230 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
|
c@251
|
231 {
|
c@251
|
232 if (!mfcc) {
|
c@251
|
233 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
|
c@251
|
234 << "No mfcc: initialise not called?"
|
c@251
|
235 << std::endl;
|
c@251
|
236 return;
|
c@251
|
237 }
|
c@251
|
238
|
c@251
|
239 if (nsamples < getWindowsize()) {
|
c@251
|
240 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
|
c@251
|
241 return;
|
c@251
|
242 }
|
c@251
|
243
|
c@251
|
244 int fftsize = mfcc->getfftlength();
|
c@251
|
245
|
c@251
|
246 vector<double> cc(ncoeff);
|
c@251
|
247
|
c@251
|
248 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
|
c@251
|
249
|
c@251
|
250 const double *psource = samples;
|
c@251
|
251 int pcount = nsamples;
|
c@251
|
252
|
c@252
|
253 if (decimator) {
|
c@252
|
254 pcount = nsamples / decimator->getFactor();
|
c@252
|
255 double *decout = new double[pcount];
|
c@252
|
256 decimator->process(samples, decout);
|
c@252
|
257 psource = decout;
|
c@252
|
258 }
|
c@252
|
259
|
c@251
|
260 int origin = 0;
|
c@251
|
261 int frames = 0;
|
c@251
|
262
|
c@251
|
263 double *frame = new double[fftsize];
|
c@251
|
264 double *ccout = new double[ncoeff];
|
c@251
|
265
|
c@251
|
266 while (origin <= pcount) {
|
c@251
|
267
|
c@251
|
268 // always need at least one fft window per block, but after
|
c@251
|
269 // that we want to avoid having any incomplete ones
|
c@251
|
270 if (origin > 0 && origin + fftsize >= pcount) break;
|
c@251
|
271
|
c@251
|
272 for (int i = 0; i < fftsize; ++i) {
|
c@251
|
273 if (origin + i < pcount) {
|
c@251
|
274 frame[i] = psource[origin + i];
|
c@251
|
275 } else {
|
c@251
|
276 frame[i] = 0.0;
|
c@251
|
277 }
|
c@251
|
278 }
|
c@251
|
279
|
c@255
|
280 mfcc->process(frame, ccout);
|
c@251
|
281
|
c@251
|
282 for (int i = 0; i < ncoeff; ++i) {
|
c@251
|
283 cc[i] += ccout[i];
|
c@251
|
284 }
|
c@251
|
285 ++frames;
|
c@251
|
286
|
c@251
|
287 origin += fftsize/2;
|
c@251
|
288 }
|
c@251
|
289
|
c@251
|
290 delete [] ccout;
|
c@251
|
291 delete [] frame;
|
c@251
|
292
|
c@251
|
293 for (int i = 0; i < ncoeff; ++i) {
|
c@251
|
294 cc[i] /= frames;
|
c@251
|
295 }
|
c@251
|
296
|
c@252
|
297 if (decimator) delete[] psource;
|
c@252
|
298
|
c@251
|
299 features.push_back(cc);
|
c@251
|
300 }
|
c@251
|
301
|
c@243
|
302 void ClusterMeltSegmenter::segment(int m)
|
c@243
|
303 {
|
c@249
|
304 nclusters = m;
|
c@249
|
305 segment();
|
c@243
|
306 }
|
c@243
|
307
|
c@243
|
308 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
|
c@243
|
309 {
|
c@249
|
310 features = f;
|
c@249
|
311 featureType = FEATURE_TYPE_UNKNOWN;
|
c@243
|
312 }
|
c@243
|
313
|
c@243
|
314 void ClusterMeltSegmenter::segment()
|
c@243
|
315 {
|
c@251
|
316 delete constq;
|
c@251
|
317 constq = 0;
|
c@251
|
318 delete mfcc;
|
c@251
|
319 mfcc = 0;
|
c@251
|
320 delete decimator;
|
c@251
|
321 decimator = 0;
|
c@283
|
322
|
c@283
|
323 if (features.size() < histogramLength) return;
|
c@283
|
324 /*
|
c@249
|
325 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
|
c@249
|
326 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
|
c@283
|
327 */
|
c@249
|
328 // copy the features to a native array and use the existing C segmenter...
|
c@249
|
329 double** arrFeatures = new double*[features.size()];
|
c@249
|
330 for (int i = 0; i < features.size(); i++)
|
c@249
|
331 {
|
c@249
|
332 if (featureType == FEATURE_TYPE_UNKNOWN) {
|
c@249
|
333 arrFeatures[i] = new double[features[0].size()];
|
c@249
|
334 for (int j = 0; j < features[0].size(); j++)
|
c@249
|
335 arrFeatures[i][j] = features[i][j];
|
c@249
|
336 } else {
|
c@249
|
337 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
|
c@249
|
338 for (int j = 0; j < ncoeff; j++)
|
c@249
|
339 arrFeatures[i][j] = features[i][j];
|
c@249
|
340 }
|
c@249
|
341 }
|
c@243
|
342
|
c@249
|
343 q = new int[features.size()];
|
c@243
|
344
|
c@251
|
345 if (featureType == FEATURE_TYPE_UNKNOWN ||
|
c@251
|
346 featureType == FEATURE_TYPE_MFCC)
|
c@249
|
347 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
|
c@249
|
348 nclusters, neighbourhoodLimit);
|
c@249
|
349 else
|
c@249
|
350 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
|
c@249
|
351 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
|
c@243
|
352
|
c@249
|
353 // convert the cluster assignment sequence to a segmentation
|
c@249
|
354 makeSegmentation(q, features.size());
|
c@243
|
355
|
c@249
|
356 // de-allocate arrays
|
c@249
|
357 delete [] q;
|
c@249
|
358 for (int i = 0; i < features.size(); i++)
|
c@249
|
359 delete [] arrFeatures[i];
|
c@249
|
360 delete [] arrFeatures;
|
c@243
|
361
|
c@249
|
362 // clear the features
|
c@249
|
363 clear();
|
c@243
|
364 }
|
c@243
|
365
|
c@243
|
366 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
|
c@243
|
367 {
|
c@249
|
368 segmentation.segments.clear();
|
c@249
|
369 segmentation.nsegtypes = nclusters;
|
c@249
|
370 segmentation.samplerate = samplerate;
|
c@243
|
371
|
c@249
|
372 Segment segment;
|
c@249
|
373 segment.start = 0;
|
c@249
|
374 segment.type = q[0];
|
c@243
|
375
|
c@249
|
376 for (int i = 1; i < len; i++)
|
c@249
|
377 {
|
c@249
|
378 if (q[i] != q[i-1])
|
c@249
|
379 {
|
c@249
|
380 segment.end = i * getHopsize();
|
c@249
|
381 segmentation.segments.push_back(segment);
|
c@249
|
382 segment.type = q[i];
|
c@249
|
383 segment.start = segment.end;
|
c@249
|
384 }
|
c@249
|
385 }
|
c@249
|
386 segment.end = len * getHopsize();
|
c@249
|
387 segmentation.segments.push_back(segment);
|
c@243
|
388 }
|
c@243
|
389
|