Mercurial > hg > qm-dsp
comparison dsp/segmentation/ClusterMeltSegmenter.cpp @ 251:c3600d3cfe5c
* Add timbral (MFCC) feature option to segmenter
author | Chris Cannam <c.cannam@qmul.ac.uk> |
---|---|
date | Thu, 10 Jan 2008 16:41:33 +0000 |
parents | 18a0dffa5c1a |
children | b678e72323df |
comparison
equal
deleted
inserted
replaced
250:a106e551e9a4 | 251:c3600d3cfe5c |
---|---|
16 #include "segment.h" | 16 #include "segment.h" |
17 | 17 |
18 #include "dsp/transforms/FFT.h" | 18 #include "dsp/transforms/FFT.h" |
19 #include "dsp/chromagram/ConstantQ.h" | 19 #include "dsp/chromagram/ConstantQ.h" |
20 #include "dsp/rateconversion/Decimator.h" | 20 #include "dsp/rateconversion/Decimator.h" |
21 #include "dsp/mfcc/MFCC.h" | |
21 | 22 |
22 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : | 23 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : |
23 window(NULL), | 24 window(NULL), |
24 constq(NULL), | 25 constq(NULL), |
26 mfcc(NULL), | |
25 featureType(params.featureType), | 27 featureType(params.featureType), |
26 hopSize(params.hopSize), | 28 hopSize(params.hopSize), |
27 windowSize(params.windowSize), | 29 windowSize(params.windowSize), |
28 fmin(params.fmin), | 30 fmin(params.fmin), |
29 fmax(params.fmax), | 31 fmax(params.fmax), |
31 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c | 33 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c |
32 nHMMStates(params.nHMMStates), | 34 nHMMStates(params.nHMMStates), |
33 nclusters(params.nclusters), | 35 nclusters(params.nclusters), |
34 histogramLength(params.histogramLength), | 36 histogramLength(params.histogramLength), |
35 neighbourhoodLimit(params.neighbourhoodLimit), | 37 neighbourhoodLimit(params.neighbourhoodLimit), |
36 decimator(0) | 38 decimator(NULL) |
37 { | 39 { |
38 } | 40 } |
39 | 41 |
40 void ClusterMeltSegmenter::initialise(int fs) | 42 void ClusterMeltSegmenter::initialise(int fs) |
41 { | 43 { |
42 samplerate = fs; | 44 samplerate = fs; |
43 | 45 |
44 if (featureType != FEATURE_TYPE_UNKNOWN) | 46 if (featureType == FEATURE_TYPE_CONSTQ || |
45 { | 47 featureType == FEATURE_TYPE_CHROMA) { |
46 // always run internal processing at 11025 or thereabouts | 48 |
49 // run internal processing at 11025 or thereabouts | |
47 int internalRate = 11025; | 50 int internalRate = 11025; |
48 int decimationFactor = samplerate / internalRate; | 51 int decimationFactor = samplerate / internalRate; |
49 if (decimationFactor < 1) decimationFactor = 1; | 52 if (decimationFactor < 1) decimationFactor = 1; |
50 | 53 |
51 // must be a power of two | 54 // must be a power of two |
66 config.BPO = nbins; | 69 config.BPO = nbins; |
67 config.CQThresh = 0.0054; | 70 config.CQThresh = 0.0054; |
68 | 71 |
69 constq = new ConstantQ(config); | 72 constq = new ConstantQ(config); |
70 constq->sparsekernel(); | 73 constq->sparsekernel(); |
71 | 74 |
72 ncoeff = constq->getK(); | 75 ncoeff = constq->getK(); |
76 | |
77 } else if (featureType == FEATURE_TYPE_MFCC) { | |
78 | |
79 MFCCConfig config; | |
80 config.FS = samplerate; | |
81 config.fftsize = 1024; | |
82 config.nceps = 20; | |
83 config.want_c0 = false; | |
84 | |
85 mfcc = new MFCC(config); | |
86 ncoeff = config.nceps; | |
73 } | 87 } |
74 } | 88 } |
75 | 89 |
76 ClusterMeltSegmenter::~ClusterMeltSegmenter() | 90 ClusterMeltSegmenter::~ClusterMeltSegmenter() |
77 { | 91 { |
81 } | 95 } |
82 | 96 |
83 int | 97 int |
84 ClusterMeltSegmenter::getWindowsize() | 98 ClusterMeltSegmenter::getWindowsize() |
85 { | 99 { |
86 if (featureType != FEATURE_TYPE_UNKNOWN) { | |
87 | |
88 if (constq) { | |
89 /* | |
90 std::cerr << "ClusterMeltSegmenter::getWindowsize: " | |
91 << "rate = " << samplerate | |
92 << ", dec factor = " << (decimator ? decimator->getFactor() : 1) | |
93 << ", fft length = " << constq->getfftlength() | |
94 << ", fmin = " << fmin | |
95 << ", fmax = " << fmax | |
96 << ", nbins = " << nbins | |
97 << ", K = " << constq->getK() | |
98 << ", Q = " << constq->getQ() | |
99 << std::endl; | |
100 */ | |
101 } | |
102 } | |
103 | |
104 return static_cast<int>(windowSize * samplerate); | 100 return static_cast<int>(windowSize * samplerate); |
105 } | 101 } |
106 | 102 |
107 int | 103 int |
108 ClusterMeltSegmenter::getHopsize() | 104 ClusterMeltSegmenter::getHopsize() |
110 return static_cast<int>(hopSize * samplerate); | 106 return static_cast<int>(hopSize * samplerate); |
111 } | 107 } |
112 | 108 |
113 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples) | 109 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples) |
114 { | 110 { |
111 if (featureType == FEATURE_TYPE_CONSTQ || | |
112 featureType == FEATURE_TYPE_CHROMA) { | |
113 extractFeaturesConstQ(samples, nsamples); | |
114 } else if (featureType == FEATURE_TYPE_MFCC) { | |
115 extractFeaturesMFCC(samples, nsamples); | |
116 } | |
117 } | |
118 | |
119 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples) | |
120 { | |
115 if (!constq) { | 121 if (!constq) { |
116 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: " | 122 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: " |
117 << "Cannot run unknown feature type (or initialise not called)" | 123 << "No const-q: initialise not called?" |
118 << std::endl; | 124 << std::endl; |
119 return; | 125 return; |
120 } | 126 } |
121 | 127 |
122 if (nsamples < getWindowsize()) { | 128 if (nsamples < getWindowsize()) { |
196 delete [] real; | 202 delete [] real; |
197 delete [] imag; | 203 delete [] imag; |
198 delete [] frame; | 204 delete [] frame; |
199 | 205 |
200 for (int i = 0; i < ncoeff; ++i) { | 206 for (int i = 0; i < ncoeff; ++i) { |
201 // std::cerr << cq[i] << " "; | |
202 cq[i] /= frames; | 207 cq[i] /= frames; |
203 } | 208 } |
204 // std::cerr << std::endl; | |
205 | 209 |
206 if (decimator) delete[] psource; | 210 if (decimator) delete[] psource; |
207 | 211 |
208 features.push_back(cq); | 212 features.push_back(cq); |
213 } | |
214 | |
215 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples) | |
216 { | |
217 if (!mfcc) { | |
218 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: " | |
219 << "No mfcc: initialise not called?" | |
220 << std::endl; | |
221 return; | |
222 } | |
223 | |
224 if (nsamples < getWindowsize()) { | |
225 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl; | |
226 return; | |
227 } | |
228 | |
229 int fftsize = mfcc->getfftlength(); | |
230 | |
231 vector<double> cc(ncoeff); | |
232 | |
233 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0; | |
234 | |
235 const double *psource = samples; | |
236 int pcount = nsamples; | |
237 | |
238 int origin = 0; | |
239 int frames = 0; | |
240 | |
241 double *frame = new double[fftsize]; | |
242 double *ccout = new double[ncoeff]; | |
243 | |
244 while (origin <= pcount) { | |
245 | |
246 // always need at least one fft window per block, but after | |
247 // that we want to avoid having any incomplete ones | |
248 if (origin > 0 && origin + fftsize >= pcount) break; | |
249 | |
250 for (int i = 0; i < fftsize; ++i) { | |
251 if (origin + i < pcount) { | |
252 frame[i] = psource[origin + i]; | |
253 } else { | |
254 frame[i] = 0.0; | |
255 } | |
256 } | |
257 | |
258 mfcc->process(fftsize, frame, ccout); | |
259 | |
260 for (int i = 0; i < ncoeff; ++i) { | |
261 cc[i] += ccout[i]; | |
262 } | |
263 ++frames; | |
264 | |
265 origin += fftsize/2; | |
266 } | |
267 | |
268 delete [] ccout; | |
269 delete [] frame; | |
270 | |
271 for (int i = 0; i < ncoeff; ++i) { | |
272 cc[i] /= frames; | |
273 } | |
274 | |
275 features.push_back(cc); | |
209 } | 276 } |
210 | 277 |
211 void ClusterMeltSegmenter::segment(int m) | 278 void ClusterMeltSegmenter::segment(int m) |
212 { | 279 { |
213 nclusters = m; | 280 nclusters = m; |
220 featureType = FEATURE_TYPE_UNKNOWN; | 287 featureType = FEATURE_TYPE_UNKNOWN; |
221 } | 288 } |
222 | 289 |
223 void ClusterMeltSegmenter::segment() | 290 void ClusterMeltSegmenter::segment() |
224 { | 291 { |
225 if (constq) | 292 delete constq; |
226 { | 293 constq = 0; |
227 delete constq; | 294 delete mfcc; |
228 constq = 0; | 295 mfcc = 0; |
229 delete decimator; | 296 delete decimator; |
230 decimator = 0; | 297 decimator = 0; |
231 } | |
232 | 298 |
233 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size() | 299 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size() |
234 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl; | 300 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl; |
235 | 301 |
236 // copy the features to a native array and use the existing C segmenter... | 302 // copy the features to a native array and use the existing C segmenter... |
248 } | 314 } |
249 } | 315 } |
250 | 316 |
251 q = new int[features.size()]; | 317 q = new int[features.size()]; |
252 | 318 |
253 if (featureType == FEATURE_TYPE_UNKNOWN) | 319 if (featureType == FEATURE_TYPE_UNKNOWN || |
320 featureType == FEATURE_TYPE_MFCC) | |
254 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, | 321 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, |
255 nclusters, neighbourhoodLimit); | 322 nclusters, neighbourhoodLimit); |
256 else | 323 else |
257 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, | 324 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, |
258 nHMMStates, histogramLength, nclusters, neighbourhoodLimit); | 325 nHMMStates, histogramLength, nclusters, neighbourhoodLimit); |