comparison dsp/segmentation/ClusterMeltSegmenter.cpp @ 26:d096a79fa772

* Add timbral (MFCC) feature option to segmenter
author cannam
date Thu, 10 Jan 2008 16:41:33 +0000
parents 2b74bd60c61f
children b678e72323df
comparison
equal deleted inserted replaced
25:54a962727271 26:d096a79fa772
16 #include "segment.h" 16 #include "segment.h"
17 17
18 #include "dsp/transforms/FFT.h" 18 #include "dsp/transforms/FFT.h"
19 #include "dsp/chromagram/ConstantQ.h" 19 #include "dsp/chromagram/ConstantQ.h"
20 #include "dsp/rateconversion/Decimator.h" 20 #include "dsp/rateconversion/Decimator.h"
21 #include "dsp/mfcc/MFCC.h"
21 22
22 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : 23 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
23 window(NULL), 24 window(NULL),
24 constq(NULL), 25 constq(NULL),
26 mfcc(NULL),
25 featureType(params.featureType), 27 featureType(params.featureType),
26 hopSize(params.hopSize), 28 hopSize(params.hopSize),
27 windowSize(params.windowSize), 29 windowSize(params.windowSize),
28 fmin(params.fmin), 30 fmin(params.fmin),
29 fmax(params.fmax), 31 fmax(params.fmax),
31 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c 33 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
32 nHMMStates(params.nHMMStates), 34 nHMMStates(params.nHMMStates),
33 nclusters(params.nclusters), 35 nclusters(params.nclusters),
34 histogramLength(params.histogramLength), 36 histogramLength(params.histogramLength),
35 neighbourhoodLimit(params.neighbourhoodLimit), 37 neighbourhoodLimit(params.neighbourhoodLimit),
36 decimator(0) 38 decimator(NULL)
37 { 39 {
38 } 40 }
39 41
40 void ClusterMeltSegmenter::initialise(int fs) 42 void ClusterMeltSegmenter::initialise(int fs)
41 { 43 {
42 samplerate = fs; 44 samplerate = fs;
43 45
44 if (featureType != FEATURE_TYPE_UNKNOWN) 46 if (featureType == FEATURE_TYPE_CONSTQ ||
45 { 47 featureType == FEATURE_TYPE_CHROMA) {
46 // always run internal processing at 11025 or thereabouts 48
49 // run internal processing at 11025 or thereabouts
47 int internalRate = 11025; 50 int internalRate = 11025;
48 int decimationFactor = samplerate / internalRate; 51 int decimationFactor = samplerate / internalRate;
49 if (decimationFactor < 1) decimationFactor = 1; 52 if (decimationFactor < 1) decimationFactor = 1;
50 53
51 // must be a power of two 54 // must be a power of two
66 config.BPO = nbins; 69 config.BPO = nbins;
67 config.CQThresh = 0.0054; 70 config.CQThresh = 0.0054;
68 71
69 constq = new ConstantQ(config); 72 constq = new ConstantQ(config);
70 constq->sparsekernel(); 73 constq->sparsekernel();
71 74
72 ncoeff = constq->getK(); 75 ncoeff = constq->getK();
76
77 } else if (featureType == FEATURE_TYPE_MFCC) {
78
79 MFCCConfig config;
80 config.FS = samplerate;
81 config.fftsize = 1024;
82 config.nceps = 20;
83 config.want_c0 = false;
84
85 mfcc = new MFCC(config);
86 ncoeff = config.nceps;
73 } 87 }
74 } 88 }
75 89
76 ClusterMeltSegmenter::~ClusterMeltSegmenter() 90 ClusterMeltSegmenter::~ClusterMeltSegmenter()
77 { 91 {
81 } 95 }
82 96
83 int 97 int
84 ClusterMeltSegmenter::getWindowsize() 98 ClusterMeltSegmenter::getWindowsize()
85 { 99 {
86 if (featureType != FEATURE_TYPE_UNKNOWN) {
87
88 if (constq) {
89 /*
90 std::cerr << "ClusterMeltSegmenter::getWindowsize: "
91 << "rate = " << samplerate
92 << ", dec factor = " << (decimator ? decimator->getFactor() : 1)
93 << ", fft length = " << constq->getfftlength()
94 << ", fmin = " << fmin
95 << ", fmax = " << fmax
96 << ", nbins = " << nbins
97 << ", K = " << constq->getK()
98 << ", Q = " << constq->getQ()
99 << std::endl;
100 */
101 }
102 }
103
104 return static_cast<int>(windowSize * samplerate); 100 return static_cast<int>(windowSize * samplerate);
105 } 101 }
106 102
107 int 103 int
108 ClusterMeltSegmenter::getHopsize() 104 ClusterMeltSegmenter::getHopsize()
110 return static_cast<int>(hopSize * samplerate); 106 return static_cast<int>(hopSize * samplerate);
111 } 107 }
112 108
113 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples) 109 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
114 { 110 {
111 if (featureType == FEATURE_TYPE_CONSTQ ||
112 featureType == FEATURE_TYPE_CHROMA) {
113 extractFeaturesConstQ(samples, nsamples);
114 } else if (featureType == FEATURE_TYPE_MFCC) {
115 extractFeaturesMFCC(samples, nsamples);
116 }
117 }
118
119 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
120 {
115 if (!constq) { 121 if (!constq) {
116 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: " 122 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
117 << "Cannot run unknown feature type (or initialise not called)" 123 << "No const-q: initialise not called?"
118 << std::endl; 124 << std::endl;
119 return; 125 return;
120 } 126 }
121 127
122 if (nsamples < getWindowsize()) { 128 if (nsamples < getWindowsize()) {
196 delete [] real; 202 delete [] real;
197 delete [] imag; 203 delete [] imag;
198 delete [] frame; 204 delete [] frame;
199 205
200 for (int i = 0; i < ncoeff; ++i) { 206 for (int i = 0; i < ncoeff; ++i) {
201 // std::cerr << cq[i] << " ";
202 cq[i] /= frames; 207 cq[i] /= frames;
203 } 208 }
204 // std::cerr << std::endl;
205 209
206 if (decimator) delete[] psource; 210 if (decimator) delete[] psource;
207 211
208 features.push_back(cq); 212 features.push_back(cq);
213 }
214
215 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
216 {
217 if (!mfcc) {
218 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
219 << "No mfcc: initialise not called?"
220 << std::endl;
221 return;
222 }
223
224 if (nsamples < getWindowsize()) {
225 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
226 return;
227 }
228
229 int fftsize = mfcc->getfftlength();
230
231 vector<double> cc(ncoeff);
232
233 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
234
235 const double *psource = samples;
236 int pcount = nsamples;
237
238 int origin = 0;
239 int frames = 0;
240
241 double *frame = new double[fftsize];
242 double *ccout = new double[ncoeff];
243
244 while (origin <= pcount) {
245
246 // always need at least one fft window per block, but after
247 // that we want to avoid having any incomplete ones
248 if (origin > 0 && origin + fftsize >= pcount) break;
249
250 for (int i = 0; i < fftsize; ++i) {
251 if (origin + i < pcount) {
252 frame[i] = psource[origin + i];
253 } else {
254 frame[i] = 0.0;
255 }
256 }
257
258 mfcc->process(fftsize, frame, ccout);
259
260 for (int i = 0; i < ncoeff; ++i) {
261 cc[i] += ccout[i];
262 }
263 ++frames;
264
265 origin += fftsize/2;
266 }
267
268 delete [] ccout;
269 delete [] frame;
270
271 for (int i = 0; i < ncoeff; ++i) {
272 cc[i] /= frames;
273 }
274
275 features.push_back(cc);
209 } 276 }
210 277
211 void ClusterMeltSegmenter::segment(int m) 278 void ClusterMeltSegmenter::segment(int m)
212 { 279 {
213 nclusters = m; 280 nclusters = m;
220 featureType = FEATURE_TYPE_UNKNOWN; 287 featureType = FEATURE_TYPE_UNKNOWN;
221 } 288 }
222 289
223 void ClusterMeltSegmenter::segment() 290 void ClusterMeltSegmenter::segment()
224 { 291 {
225 if (constq) 292 delete constq;
226 { 293 constq = 0;
227 delete constq; 294 delete mfcc;
228 constq = 0; 295 mfcc = 0;
229 delete decimator; 296 delete decimator;
230 decimator = 0; 297 decimator = 0;
231 }
232 298
233 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size() 299 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
234 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl; 300 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
235 301
236 // copy the features to a native array and use the existing C segmenter... 302 // copy the features to a native array and use the existing C segmenter...
248 } 314 }
249 } 315 }
250 316
251 q = new int[features.size()]; 317 q = new int[features.size()];
252 318
253 if (featureType == FEATURE_TYPE_UNKNOWN) 319 if (featureType == FEATURE_TYPE_UNKNOWN ||
320 featureType == FEATURE_TYPE_MFCC)
254 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, 321 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
255 nclusters, neighbourhoodLimit); 322 nclusters, neighbourhoodLimit);
256 else 323 else
257 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, 324 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
258 nHMMStates, histogramLength, nclusters, neighbourhoodLimit); 325 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);