Mercurial > hg > qm-dsp
comparison dsp/segmentation/ClusterMeltSegmenter.cpp @ 24:2b74bd60c61f
* Various fixes to segmentation code
author | cannam |
---|---|
date | Thu, 10 Jan 2008 15:14:53 +0000 |
parents | 8bdbda7fb893 |
children | d096a79fa772 |
comparison
equal
deleted
inserted
replaced
23:eea2a08a75a9 | 24:2b74bd60c61f |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
1 /* | 3 /* |
2 * ClusterMeltSegmenter.cpp | 4 * ClusterMeltSegmenter.cpp |
3 * soundbite | |
4 * | 5 * |
5 * Created by Mark Levy on 23/03/2006. | 6 * Created by Mark Levy on 23/03/2006. |
6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. | 7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. |
7 * | 8 * All rights reserved. |
8 */ | 9 */ |
9 | 10 |
10 #include <cfloat> | 11 #include <cfloat> |
11 #include <cmath> | 12 #include <cmath> |
12 | 13 |
13 #include "ClusterMeltSegmenter.h" | 14 #include "ClusterMeltSegmenter.h" |
14 #include "cluster_segmenter.h" | 15 #include "cluster_segmenter.h" |
15 #include "segment.h" | 16 #include "segment.h" |
16 | 17 |
17 #include "dsp/transforms/FFT.h" | 18 #include "dsp/transforms/FFT.h" |
18 | 19 #include "dsp/chromagram/ConstantQ.h" |
19 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : window(NULL), | 20 #include "dsp/rateconversion/Decimator.h" |
20 constq(NULL), | 21 |
21 featureType(params.featureType), | 22 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : |
22 hopSize(params.hopSize), | 23 window(NULL), |
23 windowSize(params.windowSize), | 24 constq(NULL), |
24 fmin(params.fmin), | 25 featureType(params.featureType), |
25 fmax(params.fmax), | 26 hopSize(params.hopSize), |
26 nbins(params.nbins), | 27 windowSize(params.windowSize), |
27 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c | 28 fmin(params.fmin), |
28 nHMMStates(params.nHMMStates), | 29 fmax(params.fmax), |
29 nclusters(params.nclusters), | 30 nbins(params.nbins), |
30 histogramLength(params.histogramLength), | 31 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c |
31 neighbourhoodLimit(params.neighbourhoodLimit) | 32 nHMMStates(params.nHMMStates), |
33 nclusters(params.nclusters), | |
34 histogramLength(params.histogramLength), | |
35 neighbourhoodLimit(params.neighbourhoodLimit), | |
36 decimator(0) | |
32 { | 37 { |
33 } | 38 } |
34 | 39 |
35 void ClusterMeltSegmenter::initialise(int fs) | 40 void ClusterMeltSegmenter::initialise(int fs) |
36 { | 41 { |
37 samplerate = fs; | 42 samplerate = fs; |
38 if (featureType != FEATURE_TYPE_UNKNOWN) | 43 |
39 { | 44 if (featureType != FEATURE_TYPE_UNKNOWN) |
40 //!!! ncoeff = static_cast<int>(ceil(nbins * (log(fmax / static_cast<double>(fmin))) / log(2.0))); | 45 { |
41 CQConfig config; | 46 // always run internal processing at 11025 or thereabouts |
42 config.FS = samplerate; | 47 int internalRate = 11025; |
43 config.min = fmin; | 48 int decimationFactor = samplerate / internalRate; |
44 config.max = fmax; | 49 if (decimationFactor < 1) decimationFactor = 1; |
45 config.BPO = nbins; | 50 |
46 config.CQThresh = 0.0054; | 51 // must be a power of two |
47 constq = new ConstantQ(config); | 52 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor; |
48 //!!! constq = init_constQ(fmin, fmax, nbins, samplerate, ncoeff); | 53 |
49 ncoeff = constq->getK(); | 54 if (decimationFactor > Decimator::getHighestSupportedFactor()) { |
50 } | 55 decimationFactor = Decimator::getHighestSupportedFactor(); |
56 } | |
57 | |
58 if (decimationFactor > 1) { | |
59 decimator = new Decimator(getWindowsize(), decimationFactor); | |
60 } | |
61 | |
62 CQConfig config; | |
63 config.FS = samplerate / decimationFactor; | |
64 config.min = fmin; | |
65 config.max = fmax; | |
66 config.BPO = nbins; | |
67 config.CQThresh = 0.0054; | |
68 | |
69 constq = new ConstantQ(config); | |
70 constq->sparsekernel(); | |
71 | |
72 ncoeff = constq->getK(); | |
73 } | |
51 } | 74 } |
52 | 75 |
53 ClusterMeltSegmenter::~ClusterMeltSegmenter() | 76 ClusterMeltSegmenter::~ClusterMeltSegmenter() |
54 { | 77 { |
55 delete window; | 78 delete window; |
56 delete constq; | 79 delete constq; |
57 //!!! if (constq) | 80 delete decimator; |
58 // close_constQ(constq); | |
59 } | 81 } |
60 | 82 |
61 int | 83 int |
62 ClusterMeltSegmenter::getWindowsize() | 84 ClusterMeltSegmenter::getWindowsize() |
63 { | 85 { |
64 if (featureType != FEATURE_TYPE_UNKNOWN) { | 86 if (featureType != FEATURE_TYPE_UNKNOWN) { |
65 std::cerr << "rate = " << samplerate << ", fft length = " << constq->getfftlength() << ", fmin = " << fmin << ", fmax = " << fmax << ", nbins = " << nbins << ", K = " << constq->getK() << ", Q = " << constq->getQ() << std::endl; | 87 |
66 return constq->getfftlength(); | 88 if (constq) { |
67 } else { | 89 /* |
68 return static_cast<int>(windowSize * samplerate); | 90 std::cerr << "ClusterMeltSegmenter::getWindowsize: " |
69 } | 91 << "rate = " << samplerate |
92 << ", dec factor = " << (decimator ? decimator->getFactor() : 1) | |
93 << ", fft length = " << constq->getfftlength() | |
94 << ", fmin = " << fmin | |
95 << ", fmax = " << fmax | |
96 << ", nbins = " << nbins | |
97 << ", K = " << constq->getK() | |
98 << ", Q = " << constq->getQ() | |
99 << std::endl; | |
100 */ | |
101 } | |
102 } | |
103 | |
104 return static_cast<int>(windowSize * samplerate); | |
70 } | 105 } |
71 | 106 |
72 int | 107 int |
73 ClusterMeltSegmenter::getHopsize() | 108 ClusterMeltSegmenter::getHopsize() |
74 { | 109 { |
75 return static_cast<int>(hopSize * samplerate); | 110 return static_cast<int>(hopSize * samplerate); |
76 } | 111 } |
77 | 112 |
78 void ClusterMeltSegmenter::extractFeatures(double* samples, int nsamples) | 113 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples) |
79 { | 114 { |
80 // create a new window if needed | 115 if (!constq) { |
81 /*!!! | 116 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: " |
82 if (!window || nsamples != windowLength) | 117 << "Cannot run unknown feature type (or initialise not called)" |
83 { | 118 << std::endl; |
84 if (window) | 119 return; |
85 delete [] window; | 120 } |
86 // Window<double>(HammingWindow, nsamples).cut | 121 |
87 //!!! window = hamming_p(nsamples); | 122 if (nsamples < getWindowsize()) { |
88 windowLength = nsamples; | 123 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl; |
89 } | 124 return; |
90 */ | 125 } |
91 if (!window || window->getSize() != nsamples) { | 126 |
92 delete window; | 127 int fftsize = constq->getfftlength(); |
93 window = new Window<double>(HammingWindow, nsamples); | 128 |
94 } | 129 if (!window || window->getSize() != fftsize) { |
95 | 130 delete window; |
96 // copy the samples before windowing in case we need them for something else | 131 window = new Window<double>(HammingWindow, fftsize); |
97 double* frame = new double[nsamples]; | 132 } |
98 // for (int i = 0; i < nsamples; i++) | 133 |
99 // frame[i] = samples[i] * window[i]; | 134 vector<double> cq(ncoeff); |
100 window->cut(frame); | 135 |
101 | 136 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0; |
102 std::cerr << "nsamples = " << nsamples << std::endl; | 137 |
103 | 138 const double *psource = samples; |
104 double *real = new double[nsamples]; | 139 int pcount = nsamples; |
105 double *imag = new double[nsamples]; | 140 |
106 | 141 if (decimator) { |
107 FFT::process(nsamples, false, frame, 0, real, imag); | 142 pcount = nsamples / decimator->getFactor(); |
108 | 143 double *decout = new double[pcount]; |
109 double *cqre = new double[ncoeff]; | 144 decimator->process(samples, decout); |
110 double *cqim = new double[ncoeff]; | 145 psource = decout; |
111 | 146 } |
112 constq->process(real, imag, cqre, cqim); | 147 |
113 | 148 int origin = 0; |
114 // extract const-Q | 149 |
115 //!!! do_constQ(constq, frame, nsamples); | 150 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl; |
116 // int ncq = constq->ncoeff; | 151 |
117 | 152 int frames = 0; |
118 delete [] frame; | 153 |
119 delete [] real; | 154 double *frame = new double[fftsize]; |
120 delete [] imag; | 155 double *real = new double[fftsize]; |
121 | 156 double *imag = new double[fftsize]; |
122 //!!! if (ncq == ncoeff) // else feature extraction failed | 157 double *cqre = new double[ncoeff]; |
123 // { | 158 double *cqim = new double[ncoeff]; |
124 // vector<double> cq(ncq); | 159 |
125 // for (int i = 0; i < ncq; i++) | 160 while (origin <= pcount) { |
126 // cq[i] = constq->absconstQtransform[i]; | 161 |
127 vector<double> cq(ncoeff); | 162 // always need at least one fft window per block, but after |
128 for (int i = 0; i < ncoeff; ++i) { | 163 // that we want to avoid having any incomplete ones |
129 cq[i] = sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]); | 164 if (origin > 0 && origin + fftsize >= pcount) break; |
130 } | 165 |
131 features.push_back(cq); | 166 for (int i = 0; i < fftsize; ++i) { |
132 // } | 167 if (origin + i < pcount) { |
133 | 168 frame[i] = psource[origin + i]; |
134 delete[] cqre; | 169 } else { |
135 delete[] cqim; | 170 frame[i] = 0.0; |
171 } | |
172 } | |
173 | |
174 for (int i = 0; i < fftsize/2; ++i) { | |
175 double value = frame[i]; | |
176 frame[i] = frame[i + fftsize/2]; | |
177 frame[i + fftsize/2] = value; | |
178 } | |
179 | |
180 window->cut(frame); | |
181 | |
182 FFT::process(fftsize, false, frame, 0, real, imag); | |
183 | |
184 constq->process(real, imag, cqre, cqim); | |
185 | |
186 for (int i = 0; i < ncoeff; ++i) { | |
187 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]); | |
188 } | |
189 ++frames; | |
190 | |
191 origin += fftsize/2; | |
192 } | |
193 | |
194 delete [] cqre; | |
195 delete [] cqim; | |
196 delete [] real; | |
197 delete [] imag; | |
198 delete [] frame; | |
199 | |
200 for (int i = 0; i < ncoeff; ++i) { | |
201 // std::cerr << cq[i] << " "; | |
202 cq[i] /= frames; | |
203 } | |
204 // std::cerr << std::endl; | |
205 | |
206 if (decimator) delete[] psource; | |
207 | |
208 features.push_back(cq); | |
136 } | 209 } |
137 | 210 |
138 void ClusterMeltSegmenter::segment(int m) | 211 void ClusterMeltSegmenter::segment(int m) |
139 { | 212 { |
140 nclusters = m; | 213 nclusters = m; |
141 segment(); | 214 segment(); |
142 } | 215 } |
143 | 216 |
144 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f) | 217 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f) |
145 { | 218 { |
146 features = f; | 219 features = f; |
147 featureType = FEATURE_TYPE_UNKNOWN; | 220 featureType = FEATURE_TYPE_UNKNOWN; |
148 } | 221 } |
149 | 222 |
150 void ClusterMeltSegmenter::segment() | 223 void ClusterMeltSegmenter::segment() |
151 { | 224 { |
152 if (constq) | 225 if (constq) |
153 { | 226 { |
154 //!!! close_constQ(constq); // finished extracting features | 227 delete constq; |
155 delete constq; | 228 constq = 0; |
156 constq = NULL; | 229 delete decimator; |
157 } | 230 decimator = 0; |
158 | 231 } |
159 // for now copy the features to a native array and use the existing C segmenter... | 232 |
160 double** arrFeatures = new double*[features.size()]; | 233 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size() |
161 for (int i = 0; i < features.size(); i++) | 234 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl; |
162 { | 235 |
163 if (featureType == FEATURE_TYPE_UNKNOWN) | 236 // copy the features to a native array and use the existing C segmenter... |
164 arrFeatures[i] = new double[features[0].size()]; | 237 double** arrFeatures = new double*[features.size()]; |
165 else | 238 for (int i = 0; i < features.size(); i++) |
166 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope | 239 { |
167 for (int j = 0; j < ncoeff; j++) | 240 if (featureType == FEATURE_TYPE_UNKNOWN) { |
168 arrFeatures[i][j] = features[i][j]; | 241 arrFeatures[i] = new double[features[0].size()]; |
169 } | 242 for (int j = 0; j < features[0].size(); j++) |
170 | 243 arrFeatures[i][j] = features[i][j]; |
171 q = new int[features.size()]; | 244 } else { |
172 | 245 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope |
173 if (featureType == FEATURE_TYPE_UNKNOWN) | 246 for (int j = 0; j < ncoeff; j++) |
174 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, | 247 arrFeatures[i][j] = features[i][j]; |
175 nclusters, neighbourhoodLimit); | 248 } |
176 else | 249 } |
177 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, | 250 |
178 nHMMStates, histogramLength, nclusters, neighbourhoodLimit); | 251 q = new int[features.size()]; |
179 | 252 |
180 // convert the cluster assignment sequence to a segmentation | 253 if (featureType == FEATURE_TYPE_UNKNOWN) |
181 makeSegmentation(q, features.size()); | 254 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, |
182 | 255 nclusters, neighbourhoodLimit); |
183 // de-allocate arrays | 256 else |
184 delete [] q; | 257 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, |
185 for (int i = 0; i < features.size(); i++) | 258 nHMMStates, histogramLength, nclusters, neighbourhoodLimit); |
186 delete [] arrFeatures[i]; | 259 |
187 delete [] arrFeatures; | 260 // convert the cluster assignment sequence to a segmentation |
188 | 261 makeSegmentation(q, features.size()); |
189 // clear the features | 262 |
190 clear(); | 263 // de-allocate arrays |
264 delete [] q; | |
265 for (int i = 0; i < features.size(); i++) | |
266 delete [] arrFeatures[i]; | |
267 delete [] arrFeatures; | |
268 | |
269 // clear the features | |
270 clear(); | |
191 } | 271 } |
192 | 272 |
193 void ClusterMeltSegmenter::makeSegmentation(int* q, int len) | 273 void ClusterMeltSegmenter::makeSegmentation(int* q, int len) |
194 { | 274 { |
195 segmentation.segments.clear(); | 275 segmentation.segments.clear(); |
196 segmentation.nsegtypes = nclusters; | 276 segmentation.nsegtypes = nclusters; |
197 segmentation.samplerate = samplerate; | 277 segmentation.samplerate = samplerate; |
198 | 278 |
199 Segment segment; | 279 Segment segment; |
200 segment.start = 0; | 280 segment.start = 0; |
201 segment.type = q[0]; | 281 segment.type = q[0]; |
202 | 282 |
203 for (int i = 1; i < len; i++) | 283 for (int i = 1; i < len; i++) |
204 { | 284 { |
205 if (q[i] != q[i-1]) | 285 if (q[i] != q[i-1]) |
206 { | 286 { |
207 segment.end = i * getHopsize(); | 287 segment.end = i * getHopsize(); |
208 segmentation.segments.push_back(segment); | 288 segmentation.segments.push_back(segment); |
209 segment.type = q[i]; | 289 segment.type = q[i]; |
210 segment.start = segment.end; | 290 segment.start = segment.end; |
211 } | 291 } |
212 } | 292 } |
213 segment.end = len * getHopsize(); | 293 segment.end = len * getHopsize(); |
214 segmentation.segments.push_back(segment); | 294 segmentation.segments.push_back(segment); |
215 } | 295 } |
216 | 296 |
217 /* | |
218 void ClusterMeltSegmenter::mpeg7ConstQ() | |
219 { | |
220 // convert to dB scale | |
221 for (int i = 0; i < features.size(); i++) | |
222 for (int j = 0; j < ncoeff; j++) | |
223 features[i][j] = 10.0 * log10(features[i][j] + DBL_EPSILON); | |
224 | |
225 // normalise features and add the norm at the end as an extra feature dimension | |
226 double maxnorm = 0; // track the max of the norms | |
227 for (int i = 0; i < features.size(); i++) | |
228 { | |
229 double norm = 0; | |
230 for (int j = 0; j < ncoeff; j++) | |
231 norm += features[i][j] * features[i][j]; | |
232 norm = sqrt(norm); | |
233 for (int j = 0; j < ncoeff; j++) | |
234 features[i][j] /= norm; | |
235 features[i].push_back(norm); | |
236 if (norm > maxnorm) | |
237 maxnorm = norm; | |
238 } | |
239 | |
240 // normalise the norms | |
241 for (int i = 0; i < features.size(); i++) | |
242 features[i][ncoeff] /= maxnorm; | |
243 } | |
244 */ |