cannam@24
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
cannam@24
|
2
|
cannam@18
|
3 /*
|
cannam@24
|
4 * ClusterMeltSegmenter.cpp
|
cannam@18
|
5 *
|
cannam@24
|
6 * Created by Mark Levy on 23/03/2006.
|
cannam@24
|
7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
|
cannam@24
|
8 * All rights reserved.
|
cannam@18
|
9 */
|
cannam@18
|
10
|
cannam@18
|
11 #include <cfloat>
|
cannam@18
|
12 #include <cmath>
|
cannam@18
|
13
|
cannam@18
|
14 #include "ClusterMeltSegmenter.h"
|
cannam@18
|
15 #include "cluster_segmenter.h"
|
cannam@18
|
16 #include "segment.h"
|
cannam@18
|
17
|
cannam@20
|
18 #include "dsp/transforms/FFT.h"
|
cannam@24
|
19 #include "dsp/chromagram/ConstantQ.h"
|
cannam@24
|
20 #include "dsp/rateconversion/Decimator.h"
|
cannam@26
|
21 #include "dsp/mfcc/MFCC.h"
|
cannam@20
|
22
|
cannam@24
|
23 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
|
cannam@24
|
24 window(NULL),
|
cannam@24
|
25 constq(NULL),
|
cannam@26
|
26 mfcc(NULL),
|
cannam@24
|
27 featureType(params.featureType),
|
cannam@24
|
28 hopSize(params.hopSize),
|
cannam@24
|
29 windowSize(params.windowSize),
|
cannam@24
|
30 fmin(params.fmin),
|
cannam@24
|
31 fmax(params.fmax),
|
cannam@24
|
32 nbins(params.nbins),
|
cannam@24
|
33 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
|
cannam@24
|
34 nHMMStates(params.nHMMStates),
|
cannam@24
|
35 nclusters(params.nclusters),
|
cannam@24
|
36 histogramLength(params.histogramLength),
|
cannam@24
|
37 neighbourhoodLimit(params.neighbourhoodLimit),
|
cannam@26
|
38 decimator(NULL)
|
cannam@18
|
39 {
|
cannam@18
|
40 }
|
cannam@18
|
41
|
cannam@18
|
42 void ClusterMeltSegmenter::initialise(int fs)
|
cannam@18
|
43 {
|
cannam@24
|
44 samplerate = fs;
|
cannam@24
|
45
|
cannam@26
|
46 if (featureType == FEATURE_TYPE_CONSTQ ||
|
cannam@26
|
47 featureType == FEATURE_TYPE_CHROMA) {
|
cannam@26
|
48
|
cannam@26
|
49 // run internal processing at 11025 or thereabouts
|
cannam@24
|
50 int internalRate = 11025;
|
cannam@24
|
51 int decimationFactor = samplerate / internalRate;
|
cannam@24
|
52 if (decimationFactor < 1) decimationFactor = 1;
|
cannam@24
|
53
|
cannam@24
|
54 // must be a power of two
|
cannam@24
|
55 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
|
cannam@24
|
56
|
cannam@24
|
57 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
|
cannam@24
|
58 decimationFactor = Decimator::getHighestSupportedFactor();
|
cannam@24
|
59 }
|
cannam@24
|
60
|
cannam@24
|
61 if (decimationFactor > 1) {
|
cannam@24
|
62 decimator = new Decimator(getWindowsize(), decimationFactor);
|
cannam@24
|
63 }
|
cannam@24
|
64
|
cannam@24
|
65 CQConfig config;
|
cannam@24
|
66 config.FS = samplerate / decimationFactor;
|
cannam@24
|
67 config.min = fmin;
|
cannam@24
|
68 config.max = fmax;
|
cannam@24
|
69 config.BPO = nbins;
|
cannam@24
|
70 config.CQThresh = 0.0054;
|
cannam@24
|
71
|
cannam@24
|
72 constq = new ConstantQ(config);
|
cannam@24
|
73 constq->sparsekernel();
|
cannam@26
|
74
|
cannam@26
|
75 ncoeff = constq->getK();
|
cannam@26
|
76
|
cannam@26
|
77 } else if (featureType == FEATURE_TYPE_MFCC) {
|
cannam@24
|
78
|
cannam@26
|
79 MFCCConfig config;
|
cannam@26
|
80 config.FS = samplerate;
|
cannam@26
|
81 config.fftsize = 1024;
|
cannam@26
|
82 config.nceps = 20;
|
cannam@26
|
83 config.want_c0 = false;
|
cannam@26
|
84
|
cannam@26
|
85 mfcc = new MFCC(config);
|
cannam@26
|
86 ncoeff = config.nceps;
|
cannam@24
|
87 }
|
cannam@18
|
88 }
|
cannam@18
|
89
|
cannam@18
|
90 ClusterMeltSegmenter::~ClusterMeltSegmenter()
|
cannam@18
|
91 {
|
cannam@24
|
92 delete window;
|
cannam@24
|
93 delete constq;
|
cannam@24
|
94 delete decimator;
|
cannam@20
|
95 }
|
cannam@20
|
96
|
cannam@20
|
97 int
|
cannam@20
|
98 ClusterMeltSegmenter::getWindowsize()
|
cannam@20
|
99 {
|
cannam@24
|
100 return static_cast<int>(windowSize * samplerate);
|
cannam@20
|
101 }
|
cannam@20
|
102
|
cannam@20
|
103 int
|
cannam@20
|
104 ClusterMeltSegmenter::getHopsize()
|
cannam@20
|
105 {
|
cannam@24
|
106 return static_cast<int>(hopSize * samplerate);
|
cannam@18
|
107 }
|
cannam@18
|
108
|
cannam@24
|
109 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
|
cannam@18
|
110 {
|
cannam@26
|
111 if (featureType == FEATURE_TYPE_CONSTQ ||
|
cannam@26
|
112 featureType == FEATURE_TYPE_CHROMA) {
|
cannam@26
|
113 extractFeaturesConstQ(samples, nsamples);
|
cannam@26
|
114 } else if (featureType == FEATURE_TYPE_MFCC) {
|
cannam@26
|
115 extractFeaturesMFCC(samples, nsamples);
|
cannam@26
|
116 }
|
cannam@26
|
117 }
|
cannam@26
|
118
|
cannam@26
|
119 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
|
cannam@26
|
120 {
|
cannam@24
|
121 if (!constq) {
|
cannam@26
|
122 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
|
cannam@26
|
123 << "No const-q: initialise not called?"
|
cannam@24
|
124 << std::endl;
|
cannam@24
|
125 return;
|
cannam@24
|
126 }
|
cannam@20
|
127
|
cannam@24
|
128 if (nsamples < getWindowsize()) {
|
cannam@24
|
129 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
|
cannam@24
|
130 return;
|
cannam@24
|
131 }
|
cannam@24
|
132
|
cannam@24
|
133 int fftsize = constq->getfftlength();
|
cannam@24
|
134
|
cannam@24
|
135 if (!window || window->getSize() != fftsize) {
|
cannam@24
|
136 delete window;
|
cannam@24
|
137 window = new Window<double>(HammingWindow, fftsize);
|
cannam@24
|
138 }
|
cannam@24
|
139
|
cannam@24
|
140 vector<double> cq(ncoeff);
|
cannam@24
|
141
|
cannam@24
|
142 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
|
cannam@24
|
143
|
cannam@24
|
144 const double *psource = samples;
|
cannam@24
|
145 int pcount = nsamples;
|
cannam@24
|
146
|
cannam@24
|
147 if (decimator) {
|
cannam@24
|
148 pcount = nsamples / decimator->getFactor();
|
cannam@24
|
149 double *decout = new double[pcount];
|
cannam@24
|
150 decimator->process(samples, decout);
|
cannam@24
|
151 psource = decout;
|
cannam@24
|
152 }
|
cannam@24
|
153
|
cannam@24
|
154 int origin = 0;
|
cannam@24
|
155
|
cannam@24
|
156 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
|
cannam@24
|
157
|
cannam@24
|
158 int frames = 0;
|
cannam@24
|
159
|
cannam@24
|
160 double *frame = new double[fftsize];
|
cannam@24
|
161 double *real = new double[fftsize];
|
cannam@24
|
162 double *imag = new double[fftsize];
|
cannam@24
|
163 double *cqre = new double[ncoeff];
|
cannam@24
|
164 double *cqim = new double[ncoeff];
|
cannam@24
|
165
|
cannam@24
|
166 while (origin <= pcount) {
|
cannam@24
|
167
|
cannam@24
|
168 // always need at least one fft window per block, but after
|
cannam@24
|
169 // that we want to avoid having any incomplete ones
|
cannam@24
|
170 if (origin > 0 && origin + fftsize >= pcount) break;
|
cannam@24
|
171
|
cannam@24
|
172 for (int i = 0; i < fftsize; ++i) {
|
cannam@24
|
173 if (origin + i < pcount) {
|
cannam@24
|
174 frame[i] = psource[origin + i];
|
cannam@24
|
175 } else {
|
cannam@24
|
176 frame[i] = 0.0;
|
cannam@24
|
177 }
|
cannam@24
|
178 }
|
cannam@24
|
179
|
cannam@24
|
180 for (int i = 0; i < fftsize/2; ++i) {
|
cannam@24
|
181 double value = frame[i];
|
cannam@24
|
182 frame[i] = frame[i + fftsize/2];
|
cannam@24
|
183 frame[i + fftsize/2] = value;
|
cannam@24
|
184 }
|
cannam@24
|
185
|
cannam@24
|
186 window->cut(frame);
|
cannam@24
|
187
|
cannam@24
|
188 FFT::process(fftsize, false, frame, 0, real, imag);
|
cannam@24
|
189
|
cannam@24
|
190 constq->process(real, imag, cqre, cqim);
|
cannam@18
|
191
|
cannam@24
|
192 for (int i = 0; i < ncoeff; ++i) {
|
cannam@24
|
193 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
|
cannam@24
|
194 }
|
cannam@24
|
195 ++frames;
|
cannam@20
|
196
|
cannam@24
|
197 origin += fftsize/2;
|
cannam@24
|
198 }
|
cannam@20
|
199
|
cannam@24
|
200 delete [] cqre;
|
cannam@24
|
201 delete [] cqim;
|
cannam@24
|
202 delete [] real;
|
cannam@24
|
203 delete [] imag;
|
cannam@24
|
204 delete [] frame;
|
cannam@20
|
205
|
cannam@24
|
206 for (int i = 0; i < ncoeff; ++i) {
|
cannam@24
|
207 cq[i] /= frames;
|
cannam@24
|
208 }
|
cannam@20
|
209
|
cannam@24
|
210 if (decimator) delete[] psource;
|
cannam@20
|
211
|
cannam@24
|
212 features.push_back(cq);
|
cannam@18
|
213 }
|
cannam@18
|
214
|
cannam@26
|
215 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
|
cannam@26
|
216 {
|
cannam@26
|
217 if (!mfcc) {
|
cannam@26
|
218 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
|
cannam@26
|
219 << "No mfcc: initialise not called?"
|
cannam@26
|
220 << std::endl;
|
cannam@26
|
221 return;
|
cannam@26
|
222 }
|
cannam@26
|
223
|
cannam@26
|
224 if (nsamples < getWindowsize()) {
|
cannam@26
|
225 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
|
cannam@26
|
226 return;
|
cannam@26
|
227 }
|
cannam@26
|
228
|
cannam@26
|
229 int fftsize = mfcc->getfftlength();
|
cannam@26
|
230
|
cannam@26
|
231 vector<double> cc(ncoeff);
|
cannam@26
|
232
|
cannam@26
|
233 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
|
cannam@26
|
234
|
cannam@26
|
235 const double *psource = samples;
|
cannam@26
|
236 int pcount = nsamples;
|
cannam@26
|
237
|
cannam@26
|
238 int origin = 0;
|
cannam@26
|
239 int frames = 0;
|
cannam@26
|
240
|
cannam@26
|
241 double *frame = new double[fftsize];
|
cannam@26
|
242 double *ccout = new double[ncoeff];
|
cannam@26
|
243
|
cannam@26
|
244 while (origin <= pcount) {
|
cannam@26
|
245
|
cannam@26
|
246 // always need at least one fft window per block, but after
|
cannam@26
|
247 // that we want to avoid having any incomplete ones
|
cannam@26
|
248 if (origin > 0 && origin + fftsize >= pcount) break;
|
cannam@26
|
249
|
cannam@26
|
250 for (int i = 0; i < fftsize; ++i) {
|
cannam@26
|
251 if (origin + i < pcount) {
|
cannam@26
|
252 frame[i] = psource[origin + i];
|
cannam@26
|
253 } else {
|
cannam@26
|
254 frame[i] = 0.0;
|
cannam@26
|
255 }
|
cannam@26
|
256 }
|
cannam@26
|
257
|
cannam@26
|
258 mfcc->process(fftsize, frame, ccout);
|
cannam@26
|
259
|
cannam@26
|
260 for (int i = 0; i < ncoeff; ++i) {
|
cannam@26
|
261 cc[i] += ccout[i];
|
cannam@26
|
262 }
|
cannam@26
|
263 ++frames;
|
cannam@26
|
264
|
cannam@26
|
265 origin += fftsize/2;
|
cannam@26
|
266 }
|
cannam@26
|
267
|
cannam@26
|
268 delete [] ccout;
|
cannam@26
|
269 delete [] frame;
|
cannam@26
|
270
|
cannam@26
|
271 for (int i = 0; i < ncoeff; ++i) {
|
cannam@26
|
272 cc[i] /= frames;
|
cannam@26
|
273 }
|
cannam@26
|
274
|
cannam@26
|
275 features.push_back(cc);
|
cannam@26
|
276 }
|
cannam@26
|
277
|
cannam@18
|
278 void ClusterMeltSegmenter::segment(int m)
|
cannam@18
|
279 {
|
cannam@24
|
280 nclusters = m;
|
cannam@24
|
281 segment();
|
cannam@18
|
282 }
|
cannam@18
|
283
|
cannam@18
|
284 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
|
cannam@18
|
285 {
|
cannam@24
|
286 features = f;
|
cannam@24
|
287 featureType = FEATURE_TYPE_UNKNOWN;
|
cannam@18
|
288 }
|
cannam@18
|
289
|
cannam@18
|
290 void ClusterMeltSegmenter::segment()
|
cannam@18
|
291 {
|
cannam@26
|
292 delete constq;
|
cannam@26
|
293 constq = 0;
|
cannam@26
|
294 delete mfcc;
|
cannam@26
|
295 mfcc = 0;
|
cannam@26
|
296 delete decimator;
|
cannam@26
|
297 decimator = 0;
|
cannam@18
|
298
|
cannam@24
|
299 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
|
cannam@24
|
300 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
|
cannam@24
|
301
|
cannam@24
|
302 // copy the features to a native array and use the existing C segmenter...
|
cannam@24
|
303 double** arrFeatures = new double*[features.size()];
|
cannam@24
|
304 for (int i = 0; i < features.size(); i++)
|
cannam@24
|
305 {
|
cannam@24
|
306 if (featureType == FEATURE_TYPE_UNKNOWN) {
|
cannam@24
|
307 arrFeatures[i] = new double[features[0].size()];
|
cannam@24
|
308 for (int j = 0; j < features[0].size(); j++)
|
cannam@24
|
309 arrFeatures[i][j] = features[i][j];
|
cannam@24
|
310 } else {
|
cannam@24
|
311 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
|
cannam@24
|
312 for (int j = 0; j < ncoeff; j++)
|
cannam@24
|
313 arrFeatures[i][j] = features[i][j];
|
cannam@24
|
314 }
|
cannam@24
|
315 }
|
cannam@18
|
316
|
cannam@24
|
317 q = new int[features.size()];
|
cannam@18
|
318
|
cannam@26
|
319 if (featureType == FEATURE_TYPE_UNKNOWN ||
|
cannam@26
|
320 featureType == FEATURE_TYPE_MFCC)
|
cannam@24
|
321 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
|
cannam@24
|
322 nclusters, neighbourhoodLimit);
|
cannam@24
|
323 else
|
cannam@24
|
324 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
|
cannam@24
|
325 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
|
cannam@18
|
326
|
cannam@24
|
327 // convert the cluster assignment sequence to a segmentation
|
cannam@24
|
328 makeSegmentation(q, features.size());
|
cannam@18
|
329
|
cannam@24
|
330 // de-allocate arrays
|
cannam@24
|
331 delete [] q;
|
cannam@24
|
332 for (int i = 0; i < features.size(); i++)
|
cannam@24
|
333 delete [] arrFeatures[i];
|
cannam@24
|
334 delete [] arrFeatures;
|
cannam@18
|
335
|
cannam@24
|
336 // clear the features
|
cannam@24
|
337 clear();
|
cannam@18
|
338 }
|
cannam@18
|
339
|
cannam@18
|
340 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
|
cannam@18
|
341 {
|
cannam@24
|
342 segmentation.segments.clear();
|
cannam@24
|
343 segmentation.nsegtypes = nclusters;
|
cannam@24
|
344 segmentation.samplerate = samplerate;
|
cannam@18
|
345
|
cannam@24
|
346 Segment segment;
|
cannam@24
|
347 segment.start = 0;
|
cannam@24
|
348 segment.type = q[0];
|
cannam@18
|
349
|
cannam@24
|
350 for (int i = 1; i < len; i++)
|
cannam@24
|
351 {
|
cannam@24
|
352 if (q[i] != q[i-1])
|
cannam@24
|
353 {
|
cannam@24
|
354 segment.end = i * getHopsize();
|
cannam@24
|
355 segmentation.segments.push_back(segment);
|
cannam@24
|
356 segment.type = q[i];
|
cannam@24
|
357 segment.start = segment.end;
|
cannam@24
|
358 }
|
cannam@24
|
359 }
|
cannam@24
|
360 segment.end = len * getHopsize();
|
cannam@24
|
361 segmentation.segments.push_back(segment);
|
cannam@18
|
362 }
|
cannam@18
|
363
|