comparison dsp/segmentation/ClusterMeltSegmenter.cpp @ 24:2b74bd60c61f

* Various fixes to segmentation code
author cannam
date Thu, 10 Jan 2008 15:14:53 +0000
parents 8bdbda7fb893
children d096a79fa772
comparison
equal deleted inserted replaced
23:eea2a08a75a9 24:2b74bd60c61f
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
1 /* 3 /*
2 * ClusterMeltSegmenter.cpp 4 * ClusterMeltSegmenter.cpp
3 * soundbite
4 * 5 *
5 * Created by Mark Levy on 23/03/2006. 6 * Created by Mark Levy on 23/03/2006.
6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. 7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
7 * 8 * All rights reserved.
8 */ 9 */
9 10
10 #include <cfloat> 11 #include <cfloat>
11 #include <cmath> 12 #include <cmath>
12 13
13 #include "ClusterMeltSegmenter.h" 14 #include "ClusterMeltSegmenter.h"
14 #include "cluster_segmenter.h" 15 #include "cluster_segmenter.h"
15 #include "segment.h" 16 #include "segment.h"
16 17
17 #include "dsp/transforms/FFT.h" 18 #include "dsp/transforms/FFT.h"
18 19 #include "dsp/chromagram/ConstantQ.h"
19 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : window(NULL), 20 #include "dsp/rateconversion/Decimator.h"
20 constq(NULL), 21
21 featureType(params.featureType), 22 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
22 hopSize(params.hopSize), 23 window(NULL),
23 windowSize(params.windowSize), 24 constq(NULL),
24 fmin(params.fmin), 25 featureType(params.featureType),
25 fmax(params.fmax), 26 hopSize(params.hopSize),
26 nbins(params.nbins), 27 windowSize(params.windowSize),
27 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c 28 fmin(params.fmin),
28 nHMMStates(params.nHMMStates), 29 fmax(params.fmax),
29 nclusters(params.nclusters), 30 nbins(params.nbins),
30 histogramLength(params.histogramLength), 31 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
31 neighbourhoodLimit(params.neighbourhoodLimit) 32 nHMMStates(params.nHMMStates),
33 nclusters(params.nclusters),
34 histogramLength(params.histogramLength),
35 neighbourhoodLimit(params.neighbourhoodLimit),
36 decimator(0)
32 { 37 {
33 } 38 }
34 39
35 void ClusterMeltSegmenter::initialise(int fs) 40 void ClusterMeltSegmenter::initialise(int fs)
36 { 41 {
37 samplerate = fs; 42 samplerate = fs;
38 if (featureType != FEATURE_TYPE_UNKNOWN) 43
39 { 44 if (featureType != FEATURE_TYPE_UNKNOWN)
40 //!!! ncoeff = static_cast<int>(ceil(nbins * (log(fmax / static_cast<double>(fmin))) / log(2.0))); 45 {
41 CQConfig config; 46 // always run internal processing at 11025 or thereabouts
42 config.FS = samplerate; 47 int internalRate = 11025;
43 config.min = fmin; 48 int decimationFactor = samplerate / internalRate;
44 config.max = fmax; 49 if (decimationFactor < 1) decimationFactor = 1;
45 config.BPO = nbins; 50
46 config.CQThresh = 0.0054; 51 // must be a power of two
47 constq = new ConstantQ(config); 52 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
48 //!!! constq = init_constQ(fmin, fmax, nbins, samplerate, ncoeff); 53
49 ncoeff = constq->getK(); 54 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
50 } 55 decimationFactor = Decimator::getHighestSupportedFactor();
56 }
57
58 if (decimationFactor > 1) {
59 decimator = new Decimator(getWindowsize(), decimationFactor);
60 }
61
62 CQConfig config;
63 config.FS = samplerate / decimationFactor;
64 config.min = fmin;
65 config.max = fmax;
66 config.BPO = nbins;
67 config.CQThresh = 0.0054;
68
69 constq = new ConstantQ(config);
70 constq->sparsekernel();
71
72 ncoeff = constq->getK();
73 }
51 } 74 }
52 75
53 ClusterMeltSegmenter::~ClusterMeltSegmenter() 76 ClusterMeltSegmenter::~ClusterMeltSegmenter()
54 { 77 {
55 delete window; 78 delete window;
56 delete constq; 79 delete constq;
57 //!!! if (constq) 80 delete decimator;
58 // close_constQ(constq);
59 } 81 }
60 82
61 int 83 int
62 ClusterMeltSegmenter::getWindowsize() 84 ClusterMeltSegmenter::getWindowsize()
63 { 85 {
64 if (featureType != FEATURE_TYPE_UNKNOWN) { 86 if (featureType != FEATURE_TYPE_UNKNOWN) {
65 std::cerr << "rate = " << samplerate << ", fft length = " << constq->getfftlength() << ", fmin = " << fmin << ", fmax = " << fmax << ", nbins = " << nbins << ", K = " << constq->getK() << ", Q = " << constq->getQ() << std::endl; 87
66 return constq->getfftlength(); 88 if (constq) {
67 } else { 89 /*
68 return static_cast<int>(windowSize * samplerate); 90 std::cerr << "ClusterMeltSegmenter::getWindowsize: "
69 } 91 << "rate = " << samplerate
92 << ", dec factor = " << (decimator ? decimator->getFactor() : 1)
93 << ", fft length = " << constq->getfftlength()
94 << ", fmin = " << fmin
95 << ", fmax = " << fmax
96 << ", nbins = " << nbins
97 << ", K = " << constq->getK()
98 << ", Q = " << constq->getQ()
99 << std::endl;
100 */
101 }
102 }
103
104 return static_cast<int>(windowSize * samplerate);
70 } 105 }
71 106
72 int 107 int
73 ClusterMeltSegmenter::getHopsize() 108 ClusterMeltSegmenter::getHopsize()
74 { 109 {
75 return static_cast<int>(hopSize * samplerate); 110 return static_cast<int>(hopSize * samplerate);
76 } 111 }
77 112
78 void ClusterMeltSegmenter::extractFeatures(double* samples, int nsamples) 113 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
79 { 114 {
80 // create a new window if needed 115 if (!constq) {
81 /*!!! 116 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: "
82 if (!window || nsamples != windowLength) 117 << "Cannot run unknown feature type (or initialise not called)"
83 { 118 << std::endl;
84 if (window) 119 return;
85 delete [] window; 120 }
86 // Window<double>(HammingWindow, nsamples).cut 121
87 //!!! window = hamming_p(nsamples); 122 if (nsamples < getWindowsize()) {
88 windowLength = nsamples; 123 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
89 } 124 return;
90 */ 125 }
91 if (!window || window->getSize() != nsamples) { 126
92 delete window; 127 int fftsize = constq->getfftlength();
93 window = new Window<double>(HammingWindow, nsamples); 128
94 } 129 if (!window || window->getSize() != fftsize) {
95 130 delete window;
96 // copy the samples before windowing in case we need them for something else 131 window = new Window<double>(HammingWindow, fftsize);
97 double* frame = new double[nsamples]; 132 }
98 // for (int i = 0; i < nsamples; i++) 133
99 // frame[i] = samples[i] * window[i]; 134 vector<double> cq(ncoeff);
100 window->cut(frame); 135
101 136 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
102 std::cerr << "nsamples = " << nsamples << std::endl; 137
103 138 const double *psource = samples;
104 double *real = new double[nsamples]; 139 int pcount = nsamples;
105 double *imag = new double[nsamples]; 140
106 141 if (decimator) {
107 FFT::process(nsamples, false, frame, 0, real, imag); 142 pcount = nsamples / decimator->getFactor();
108 143 double *decout = new double[pcount];
109 double *cqre = new double[ncoeff]; 144 decimator->process(samples, decout);
110 double *cqim = new double[ncoeff]; 145 psource = decout;
111 146 }
112 constq->process(real, imag, cqre, cqim); 147
113 148 int origin = 0;
114 // extract const-Q 149
115 //!!! do_constQ(constq, frame, nsamples); 150 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
116 // int ncq = constq->ncoeff; 151
117 152 int frames = 0;
118 delete [] frame; 153
119 delete [] real; 154 double *frame = new double[fftsize];
120 delete [] imag; 155 double *real = new double[fftsize];
121 156 double *imag = new double[fftsize];
122 //!!! if (ncq == ncoeff) // else feature extraction failed 157 double *cqre = new double[ncoeff];
123 // { 158 double *cqim = new double[ncoeff];
124 // vector<double> cq(ncq); 159
125 // for (int i = 0; i < ncq; i++) 160 while (origin <= pcount) {
126 // cq[i] = constq->absconstQtransform[i]; 161
127 vector<double> cq(ncoeff); 162 // always need at least one fft window per block, but after
128 for (int i = 0; i < ncoeff; ++i) { 163 // that we want to avoid having any incomplete ones
129 cq[i] = sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]); 164 if (origin > 0 && origin + fftsize >= pcount) break;
130 } 165
131 features.push_back(cq); 166 for (int i = 0; i < fftsize; ++i) {
132 // } 167 if (origin + i < pcount) {
133 168 frame[i] = psource[origin + i];
134 delete[] cqre; 169 } else {
135 delete[] cqim; 170 frame[i] = 0.0;
171 }
172 }
173
174 for (int i = 0; i < fftsize/2; ++i) {
175 double value = frame[i];
176 frame[i] = frame[i + fftsize/2];
177 frame[i + fftsize/2] = value;
178 }
179
180 window->cut(frame);
181
182 FFT::process(fftsize, false, frame, 0, real, imag);
183
184 constq->process(real, imag, cqre, cqim);
185
186 for (int i = 0; i < ncoeff; ++i) {
187 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
188 }
189 ++frames;
190
191 origin += fftsize/2;
192 }
193
194 delete [] cqre;
195 delete [] cqim;
196 delete [] real;
197 delete [] imag;
198 delete [] frame;
199
200 for (int i = 0; i < ncoeff; ++i) {
201 // std::cerr << cq[i] << " ";
202 cq[i] /= frames;
203 }
204 // std::cerr << std::endl;
205
206 if (decimator) delete[] psource;
207
208 features.push_back(cq);
136 } 209 }
137 210
138 void ClusterMeltSegmenter::segment(int m) 211 void ClusterMeltSegmenter::segment(int m)
139 { 212 {
140 nclusters = m; 213 nclusters = m;
141 segment(); 214 segment();
142 } 215 }
143 216
144 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f) 217 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
145 { 218 {
146 features = f; 219 features = f;
147 featureType = FEATURE_TYPE_UNKNOWN; 220 featureType = FEATURE_TYPE_UNKNOWN;
148 } 221 }
149 222
150 void ClusterMeltSegmenter::segment() 223 void ClusterMeltSegmenter::segment()
151 { 224 {
152 if (constq) 225 if (constq)
153 { 226 {
154 //!!! close_constQ(constq); // finished extracting features 227 delete constq;
155 delete constq; 228 constq = 0;
156 constq = NULL; 229 delete decimator;
157 } 230 decimator = 0;
158 231 }
159 // for now copy the features to a native array and use the existing C segmenter... 232
160 double** arrFeatures = new double*[features.size()]; 233 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
161 for (int i = 0; i < features.size(); i++) 234 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
162 { 235
163 if (featureType == FEATURE_TYPE_UNKNOWN) 236 // copy the features to a native array and use the existing C segmenter...
164 arrFeatures[i] = new double[features[0].size()]; 237 double** arrFeatures = new double*[features.size()];
165 else 238 for (int i = 0; i < features.size(); i++)
166 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope 239 {
167 for (int j = 0; j < ncoeff; j++) 240 if (featureType == FEATURE_TYPE_UNKNOWN) {
168 arrFeatures[i][j] = features[i][j]; 241 arrFeatures[i] = new double[features[0].size()];
169 } 242 for (int j = 0; j < features[0].size(); j++)
170 243 arrFeatures[i][j] = features[i][j];
171 q = new int[features.size()]; 244 } else {
172 245 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
173 if (featureType == FEATURE_TYPE_UNKNOWN) 246 for (int j = 0; j < ncoeff; j++)
174 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, 247 arrFeatures[i][j] = features[i][j];
175 nclusters, neighbourhoodLimit); 248 }
176 else 249 }
177 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, 250
178 nHMMStates, histogramLength, nclusters, neighbourhoodLimit); 251 q = new int[features.size()];
179 252
180 // convert the cluster assignment sequence to a segmentation 253 if (featureType == FEATURE_TYPE_UNKNOWN)
181 makeSegmentation(q, features.size()); 254 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
182 255 nclusters, neighbourhoodLimit);
183 // de-allocate arrays 256 else
184 delete [] q; 257 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
185 for (int i = 0; i < features.size(); i++) 258 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
186 delete [] arrFeatures[i]; 259
187 delete [] arrFeatures; 260 // convert the cluster assignment sequence to a segmentation
188 261 makeSegmentation(q, features.size());
189 // clear the features 262
190 clear(); 263 // de-allocate arrays
264 delete [] q;
265 for (int i = 0; i < features.size(); i++)
266 delete [] arrFeatures[i];
267 delete [] arrFeatures;
268
269 // clear the features
270 clear();
191 } 271 }
192 272
193 void ClusterMeltSegmenter::makeSegmentation(int* q, int len) 273 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
194 { 274 {
195 segmentation.segments.clear(); 275 segmentation.segments.clear();
196 segmentation.nsegtypes = nclusters; 276 segmentation.nsegtypes = nclusters;
197 segmentation.samplerate = samplerate; 277 segmentation.samplerate = samplerate;
198 278
199 Segment segment; 279 Segment segment;
200 segment.start = 0; 280 segment.start = 0;
201 segment.type = q[0]; 281 segment.type = q[0];
202 282
203 for (int i = 1; i < len; i++) 283 for (int i = 1; i < len; i++)
204 { 284 {
205 if (q[i] != q[i-1]) 285 if (q[i] != q[i-1])
206 { 286 {
207 segment.end = i * getHopsize(); 287 segment.end = i * getHopsize();
208 segmentation.segments.push_back(segment); 288 segmentation.segments.push_back(segment);
209 segment.type = q[i]; 289 segment.type = q[i];
210 segment.start = segment.end; 290 segment.start = segment.end;
211 } 291 }
212 } 292 }
213 segment.end = len * getHopsize(); 293 segment.end = len * getHopsize();
214 segmentation.segments.push_back(segment); 294 segmentation.segments.push_back(segment);
215 } 295 }
216 296
217 /*
218 void ClusterMeltSegmenter::mpeg7ConstQ()
219 {
220 // convert to dB scale
221 for (int i = 0; i < features.size(); i++)
222 for (int j = 0; j < ncoeff; j++)
223 features[i][j] = 10.0 * log10(features[i][j] + DBL_EPSILON);
224
225 // normalise features and add the norm at the end as an extra feature dimension
226 double maxnorm = 0; // track the max of the norms
227 for (int i = 0; i < features.size(); i++)
228 {
229 double norm = 0;
230 for (int j = 0; j < ncoeff; j++)
231 norm += features[i][j] * features[i][j];
232 norm = sqrt(norm);
233 for (int j = 0; j < ncoeff; j++)
234 features[i][j] /= norm;
235 features[i].push_back(norm);
236 if (norm > maxnorm)
237 maxnorm = norm;
238 }
239
240 // normalise the norms
241 for (int i = 0; i < features.size(); i++)
242 features[i][ncoeff] /= maxnorm;
243 }
244 */