c@249
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
c@249
|
2
|
c@243
|
3 /*
|
c@249
|
4 * ClusterMeltSegmenter.cpp
|
c@243
|
5 *
|
c@249
|
6 * Created by Mark Levy on 23/03/2006.
|
c@249
|
7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
|
c@309
|
8
|
c@309
|
9 This program is free software; you can redistribute it and/or
|
c@309
|
10 modify it under the terms of the GNU General Public License as
|
c@309
|
11 published by the Free Software Foundation; either version 2 of the
|
c@309
|
12 License, or (at your option) any later version. See the file
|
c@309
|
13 COPYING included with this distribution for more information.
|
c@243
|
14 */
|
c@243
|
15
|
c@243
|
16 #include <cfloat>
|
c@243
|
17 #include <cmath>
|
c@243
|
18
|
c@243
|
19 #include "ClusterMeltSegmenter.h"
|
c@243
|
20 #include "cluster_segmenter.h"
|
c@243
|
21 #include "segment.h"
|
c@243
|
22
|
c@245
|
23 #include "dsp/transforms/FFT.h"
|
c@249
|
24 #include "dsp/chromagram/ConstantQ.h"
|
c@249
|
25 #include "dsp/rateconversion/Decimator.h"
|
c@251
|
26 #include "dsp/mfcc/MFCC.h"
|
c@245
|
27
|
cannam@493
|
28 using std::vector;
|
cannam@493
|
29
|
c@249
|
30 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
|
c@249
|
31 window(NULL),
|
c@289
|
32 fft(NULL),
|
c@249
|
33 constq(NULL),
|
c@251
|
34 mfcc(NULL),
|
c@249
|
35 featureType(params.featureType),
|
c@249
|
36 hopSize(params.hopSize),
|
c@249
|
37 windowSize(params.windowSize),
|
c@249
|
38 fmin(params.fmin),
|
c@249
|
39 fmax(params.fmax),
|
c@249
|
40 nbins(params.nbins),
|
cannam@480
|
41 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
|
c@249
|
42 nHMMStates(params.nHMMStates),
|
c@249
|
43 nclusters(params.nclusters),
|
c@249
|
44 histogramLength(params.histogramLength),
|
c@249
|
45 neighbourhoodLimit(params.neighbourhoodLimit),
|
c@251
|
46 decimator(NULL)
|
c@243
|
47 {
|
c@243
|
48 }
|
c@243
|
49
|
c@243
|
50 void ClusterMeltSegmenter::initialise(int fs)
|
c@243
|
51 {
|
c@249
|
52 samplerate = fs;
|
c@249
|
53
|
c@251
|
54 if (featureType == FEATURE_TYPE_CONSTQ ||
|
c@251
|
55 featureType == FEATURE_TYPE_CHROMA) {
|
c@251
|
56
|
c@251
|
57 // run internal processing at 11025 or thereabouts
|
c@249
|
58 int internalRate = 11025;
|
c@249
|
59 int decimationFactor = samplerate / internalRate;
|
c@249
|
60 if (decimationFactor < 1) decimationFactor = 1;
|
c@249
|
61
|
c@249
|
62 // must be a power of two
|
c@249
|
63 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
|
c@249
|
64
|
c@249
|
65 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
|
c@249
|
66 decimationFactor = Decimator::getHighestSupportedFactor();
|
c@249
|
67 }
|
c@249
|
68
|
c@249
|
69 if (decimationFactor > 1) {
|
c@249
|
70 decimator = new Decimator(getWindowsize(), decimationFactor);
|
c@249
|
71 }
|
c@249
|
72
|
c@249
|
73 CQConfig config;
|
c@249
|
74 config.FS = samplerate / decimationFactor;
|
c@249
|
75 config.min = fmin;
|
c@249
|
76 config.max = fmax;
|
c@249
|
77 config.BPO = nbins;
|
c@249
|
78 config.CQThresh = 0.0054;
|
c@249
|
79
|
c@249
|
80 constq = new ConstantQ(config);
|
c@249
|
81 constq->sparsekernel();
|
c@251
|
82
|
c@251
|
83 ncoeff = constq->getK();
|
c@289
|
84
|
cannam@495
|
85 fft = new FFTReal(constq->getFFTLength());
|
c@251
|
86
|
c@251
|
87 } else if (featureType == FEATURE_TYPE_MFCC) {
|
c@249
|
88
|
c@252
|
89 // run internal processing at 22050 or thereabouts
|
c@252
|
90 int internalRate = 22050;
|
c@252
|
91 int decimationFactor = samplerate / internalRate;
|
c@252
|
92 if (decimationFactor < 1) decimationFactor = 1;
|
c@252
|
93
|
c@252
|
94 // must be a power of two
|
c@252
|
95 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
|
c@252
|
96
|
c@252
|
97 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
|
c@252
|
98 decimationFactor = Decimator::getHighestSupportedFactor();
|
c@252
|
99 }
|
c@252
|
100
|
c@252
|
101 if (decimationFactor > 1) {
|
c@252
|
102 decimator = new Decimator(getWindowsize(), decimationFactor);
|
c@252
|
103 }
|
c@252
|
104
|
c@255
|
105 MFCCConfig config(samplerate / decimationFactor);
|
c@252
|
106 config.fftsize = 2048;
|
c@252
|
107 config.nceps = 19;
|
c@252
|
108 config.want_c0 = true;
|
c@251
|
109
|
c@251
|
110 mfcc = new MFCC(config);
|
c@252
|
111 ncoeff = config.nceps + 1;
|
c@249
|
112 }
|
c@243
|
113 }
|
c@243
|
114
|
c@243
|
115 ClusterMeltSegmenter::~ClusterMeltSegmenter()
|
c@243
|
116 {
|
c@249
|
117 delete window;
|
c@249
|
118 delete constq;
|
c@249
|
119 delete decimator;
|
c@289
|
120 delete fft;
|
c@245
|
121 }
|
c@245
|
122
|
c@245
|
123 int
|
c@245
|
124 ClusterMeltSegmenter::getWindowsize()
|
c@245
|
125 {
|
c@269
|
126 return static_cast<int>(windowSize * samplerate + 0.001);
|
c@245
|
127 }
|
c@245
|
128
|
c@245
|
129 int
|
c@245
|
130 ClusterMeltSegmenter::getHopsize()
|
c@245
|
131 {
|
c@269
|
132 return static_cast<int>(hopSize * samplerate + 0.001);
|
c@243
|
133 }
|
c@243
|
134
|
c@249
|
135 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
|
c@243
|
136 {
|
c@251
|
137 if (featureType == FEATURE_TYPE_CONSTQ ||
|
c@251
|
138 featureType == FEATURE_TYPE_CHROMA) {
|
c@251
|
139 extractFeaturesConstQ(samples, nsamples);
|
c@251
|
140 } else if (featureType == FEATURE_TYPE_MFCC) {
|
c@251
|
141 extractFeaturesMFCC(samples, nsamples);
|
c@251
|
142 }
|
c@251
|
143 }
|
c@251
|
144
|
c@251
|
145 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
|
c@251
|
146 {
|
c@249
|
147 if (!constq) {
|
c@251
|
148 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
|
c@251
|
149 << "No const-q: initialise not called?"
|
c@249
|
150 << std::endl;
|
c@249
|
151 return;
|
c@249
|
152 }
|
c@245
|
153
|
c@249
|
154 if (nsamples < getWindowsize()) {
|
c@249
|
155 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
|
c@249
|
156 return;
|
c@249
|
157 }
|
c@249
|
158
|
cannam@495
|
159 int fftsize = constq->getFFTLength();
|
c@249
|
160
|
c@249
|
161 if (!window || window->getSize() != fftsize) {
|
c@249
|
162 delete window;
|
c@249
|
163 window = new Window<double>(HammingWindow, fftsize);
|
c@249
|
164 }
|
c@249
|
165
|
c@249
|
166 vector<double> cq(ncoeff);
|
c@249
|
167
|
c@249
|
168 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
|
c@249
|
169
|
c@249
|
170 const double *psource = samples;
|
c@249
|
171 int pcount = nsamples;
|
c@249
|
172
|
c@249
|
173 if (decimator) {
|
c@249
|
174 pcount = nsamples / decimator->getFactor();
|
c@249
|
175 double *decout = new double[pcount];
|
c@249
|
176 decimator->process(samples, decout);
|
c@249
|
177 psource = decout;
|
c@249
|
178 }
|
c@249
|
179
|
c@249
|
180 int origin = 0;
|
c@249
|
181
|
c@249
|
182 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
|
c@249
|
183
|
c@249
|
184 int frames = 0;
|
c@249
|
185
|
c@249
|
186 double *frame = new double[fftsize];
|
c@249
|
187 double *real = new double[fftsize];
|
c@249
|
188 double *imag = new double[fftsize];
|
c@249
|
189 double *cqre = new double[ncoeff];
|
c@249
|
190 double *cqim = new double[ncoeff];
|
c@249
|
191
|
c@249
|
192 while (origin <= pcount) {
|
c@249
|
193
|
c@249
|
194 // always need at least one fft window per block, but after
|
c@249
|
195 // that we want to avoid having any incomplete ones
|
c@249
|
196 if (origin > 0 && origin + fftsize >= pcount) break;
|
c@249
|
197
|
c@249
|
198 for (int i = 0; i < fftsize; ++i) {
|
c@249
|
199 if (origin + i < pcount) {
|
c@249
|
200 frame[i] = psource[origin + i];
|
c@249
|
201 } else {
|
c@249
|
202 frame[i] = 0.0;
|
c@249
|
203 }
|
c@249
|
204 }
|
c@249
|
205
|
c@249
|
206 for (int i = 0; i < fftsize/2; ++i) {
|
c@249
|
207 double value = frame[i];
|
c@249
|
208 frame[i] = frame[i + fftsize/2];
|
c@249
|
209 frame[i + fftsize/2] = value;
|
c@249
|
210 }
|
c@249
|
211
|
c@249
|
212 window->cut(frame);
|
c@249
|
213
|
c@339
|
214 fft->forward(frame, real, imag);
|
c@249
|
215
|
c@249
|
216 constq->process(real, imag, cqre, cqim);
|
cannam@480
|
217
|
c@249
|
218 for (int i = 0; i < ncoeff; ++i) {
|
c@249
|
219 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
|
c@249
|
220 }
|
c@249
|
221 ++frames;
|
c@245
|
222
|
c@249
|
223 origin += fftsize/2;
|
c@249
|
224 }
|
c@245
|
225
|
c@249
|
226 delete [] cqre;
|
c@249
|
227 delete [] cqim;
|
c@249
|
228 delete [] real;
|
c@249
|
229 delete [] imag;
|
c@249
|
230 delete [] frame;
|
c@245
|
231
|
c@249
|
232 for (int i = 0; i < ncoeff; ++i) {
|
c@249
|
233 cq[i] /= frames;
|
c@249
|
234 }
|
c@245
|
235
|
c@249
|
236 if (decimator) delete[] psource;
|
c@245
|
237
|
c@249
|
238 features.push_back(cq);
|
c@243
|
239 }
|
c@243
|
240
|
c@251
|
241 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
|
c@251
|
242 {
|
c@251
|
243 if (!mfcc) {
|
c@251
|
244 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
|
c@251
|
245 << "No mfcc: initialise not called?"
|
c@251
|
246 << std::endl;
|
c@251
|
247 return;
|
c@251
|
248 }
|
c@251
|
249
|
c@251
|
250 if (nsamples < getWindowsize()) {
|
c@251
|
251 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
|
c@251
|
252 return;
|
c@251
|
253 }
|
c@251
|
254
|
c@251
|
255 int fftsize = mfcc->getfftlength();
|
c@251
|
256
|
c@251
|
257 vector<double> cc(ncoeff);
|
c@251
|
258
|
c@251
|
259 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
|
c@251
|
260
|
c@251
|
261 const double *psource = samples;
|
c@251
|
262 int pcount = nsamples;
|
c@251
|
263
|
c@252
|
264 if (decimator) {
|
c@252
|
265 pcount = nsamples / decimator->getFactor();
|
c@252
|
266 double *decout = new double[pcount];
|
c@252
|
267 decimator->process(samples, decout);
|
c@252
|
268 psource = decout;
|
c@252
|
269 }
|
c@252
|
270
|
c@251
|
271 int origin = 0;
|
c@251
|
272 int frames = 0;
|
c@251
|
273
|
c@251
|
274 double *frame = new double[fftsize];
|
c@251
|
275 double *ccout = new double[ncoeff];
|
c@251
|
276
|
c@251
|
277 while (origin <= pcount) {
|
c@251
|
278
|
c@251
|
279 // always need at least one fft window per block, but after
|
c@251
|
280 // that we want to avoid having any incomplete ones
|
c@251
|
281 if (origin > 0 && origin + fftsize >= pcount) break;
|
c@251
|
282
|
c@251
|
283 for (int i = 0; i < fftsize; ++i) {
|
c@251
|
284 if (origin + i < pcount) {
|
c@251
|
285 frame[i] = psource[origin + i];
|
c@251
|
286 } else {
|
c@251
|
287 frame[i] = 0.0;
|
c@251
|
288 }
|
c@251
|
289 }
|
c@251
|
290
|
c@255
|
291 mfcc->process(frame, ccout);
|
cannam@480
|
292
|
c@251
|
293 for (int i = 0; i < ncoeff; ++i) {
|
c@251
|
294 cc[i] += ccout[i];
|
c@251
|
295 }
|
c@251
|
296 ++frames;
|
c@251
|
297
|
c@251
|
298 origin += fftsize/2;
|
c@251
|
299 }
|
c@251
|
300
|
c@251
|
301 delete [] ccout;
|
c@251
|
302 delete [] frame;
|
c@251
|
303
|
c@251
|
304 for (int i = 0; i < ncoeff; ++i) {
|
c@251
|
305 cc[i] /= frames;
|
c@251
|
306 }
|
c@251
|
307
|
c@252
|
308 if (decimator) delete[] psource;
|
c@252
|
309
|
c@251
|
310 features.push_back(cc);
|
c@251
|
311 }
|
c@251
|
312
|
c@243
|
313 void ClusterMeltSegmenter::segment(int m)
|
c@243
|
314 {
|
c@249
|
315 nclusters = m;
|
c@249
|
316 segment();
|
c@243
|
317 }
|
c@243
|
318
|
c@243
|
319 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
|
c@243
|
320 {
|
c@249
|
321 features = f;
|
c@249
|
322 featureType = FEATURE_TYPE_UNKNOWN;
|
c@243
|
323 }
|
c@243
|
324
|
c@243
|
325 void ClusterMeltSegmenter::segment()
|
c@243
|
326 {
|
c@251
|
327 delete constq;
|
c@251
|
328 constq = 0;
|
c@251
|
329 delete mfcc;
|
c@251
|
330 mfcc = 0;
|
c@251
|
331 delete decimator;
|
c@251
|
332 decimator = 0;
|
c@283
|
333
|
c@414
|
334 int sz = features.size();
|
c@414
|
335
|
c@414
|
336 if (sz < histogramLength) return;
|
c@283
|
337 /*
|
c@249
|
338 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
|
c@249
|
339 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
|
c@283
|
340 */
|
c@249
|
341 // copy the features to a native array and use the existing C segmenter...
|
cannam@480
|
342 double** arrFeatures = new double*[features.size()];
|
cannam@480
|
343 for (int i = 0; i < sz; i++) {
|
c@249
|
344 if (featureType == FEATURE_TYPE_UNKNOWN) {
|
c@249
|
345 arrFeatures[i] = new double[features[0].size()];
|
c@414
|
346 for (int j = 0; j < int(features[0].size()); j++) {
|
c@414
|
347 arrFeatures[i][j] = features[i][j];
|
c@414
|
348 }
|
c@249
|
349 } else {
|
cannam@480
|
350 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
|
c@414
|
351 for (int j = 0; j < ncoeff; j++) {
|
c@414
|
352 arrFeatures[i][j] = features[i][j];
|
c@414
|
353 }
|
c@249
|
354 }
|
c@249
|
355 }
|
cannam@480
|
356
|
c@249
|
357 q = new int[features.size()];
|
cannam@480
|
358
|
c@251
|
359 if (featureType == FEATURE_TYPE_UNKNOWN ||
|
cannam@480
|
360 featureType == FEATURE_TYPE_MFCC) {
|
c@249
|
361 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
|
c@249
|
362 nclusters, neighbourhoodLimit);
|
cannam@480
|
363 } else {
|
c@249
|
364 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
|
c@249
|
365 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
|
cannam@480
|
366 }
|
cannam@480
|
367
|
c@249
|
368 // convert the cluster assignment sequence to a segmentation
|
cannam@480
|
369 makeSegmentation(q, features.size());
|
cannam@480
|
370
|
c@249
|
371 // de-allocate arrays
|
c@249
|
372 delete [] q;
|
c@414
|
373 for (int i = 0; i < int(features.size()); i++) delete [] arrFeatures[i];
|
c@249
|
374 delete [] arrFeatures;
|
cannam@480
|
375
|
c@249
|
376 // clear the features
|
c@249
|
377 clear();
|
c@243
|
378 }
|
c@243
|
379
|
c@243
|
380 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
|
c@243
|
381 {
|
c@249
|
382 segmentation.segments.clear();
|
c@249
|
383 segmentation.nsegtypes = nclusters;
|
c@249
|
384 segmentation.samplerate = samplerate;
|
cannam@480
|
385
|
c@249
|
386 Segment segment;
|
c@249
|
387 segment.start = 0;
|
c@249
|
388 segment.type = q[0];
|
cannam@480
|
389
|
cannam@480
|
390 for (int i = 1; i < len; i++) {
|
cannam@480
|
391 if (q[i] != q[i-1]) {
|
c@249
|
392 segment.end = i * getHopsize();
|
c@249
|
393 segmentation.segments.push_back(segment);
|
c@249
|
394 segment.type = q[i];
|
c@249
|
395 segment.start = segment.end;
|
c@249
|
396 }
|
c@249
|
397 }
|
c@249
|
398 segment.end = len * getHopsize();
|
c@249
|
399 segmentation.segments.push_back(segment);
|
c@243
|
400 }
|
c@243
|
401
|