cannam@24
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
cannam@24
|
2
|
cannam@18
|
3 /*
|
cannam@24
|
4 * ClusterMeltSegmenter.cpp
|
cannam@18
|
5 *
|
cannam@24
|
6 * Created by Mark Levy on 23/03/2006.
|
cannam@24
|
7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
|
cannam@24
|
8 * All rights reserved.
|
cannam@18
|
9 */
|
cannam@18
|
10
|
cannam@18
|
11 #include <cfloat>
|
cannam@18
|
12 #include <cmath>
|
cannam@18
|
13
|
cannam@18
|
14 #include "ClusterMeltSegmenter.h"
|
cannam@18
|
15 #include "cluster_segmenter.h"
|
cannam@18
|
16 #include "segment.h"
|
cannam@18
|
17
|
cannam@20
|
18 #include "dsp/transforms/FFT.h"
|
cannam@24
|
19 #include "dsp/chromagram/ConstantQ.h"
|
cannam@24
|
20 #include "dsp/rateconversion/Decimator.h"
|
cannam@20
|
21
|
cannam@24
|
22 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
|
cannam@24
|
23 window(NULL),
|
cannam@24
|
24 constq(NULL),
|
cannam@24
|
25 featureType(params.featureType),
|
cannam@24
|
26 hopSize(params.hopSize),
|
cannam@24
|
27 windowSize(params.windowSize),
|
cannam@24
|
28 fmin(params.fmin),
|
cannam@24
|
29 fmax(params.fmax),
|
cannam@24
|
30 nbins(params.nbins),
|
cannam@24
|
31 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
|
cannam@24
|
32 nHMMStates(params.nHMMStates),
|
cannam@24
|
33 nclusters(params.nclusters),
|
cannam@24
|
34 histogramLength(params.histogramLength),
|
cannam@24
|
35 neighbourhoodLimit(params.neighbourhoodLimit),
|
cannam@24
|
36 decimator(0)
|
cannam@18
|
37 {
|
cannam@18
|
38 }
|
cannam@18
|
39
|
cannam@18
|
40 void ClusterMeltSegmenter::initialise(int fs)
|
cannam@18
|
41 {
|
cannam@24
|
42 samplerate = fs;
|
cannam@24
|
43
|
cannam@24
|
44 if (featureType != FEATURE_TYPE_UNKNOWN)
|
cannam@24
|
45 {
|
cannam@24
|
46 // always run internal processing at 11025 or thereabouts
|
cannam@24
|
47 int internalRate = 11025;
|
cannam@24
|
48 int decimationFactor = samplerate / internalRate;
|
cannam@24
|
49 if (decimationFactor < 1) decimationFactor = 1;
|
cannam@24
|
50
|
cannam@24
|
51 // must be a power of two
|
cannam@24
|
52 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
|
cannam@24
|
53
|
cannam@24
|
54 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
|
cannam@24
|
55 decimationFactor = Decimator::getHighestSupportedFactor();
|
cannam@24
|
56 }
|
cannam@24
|
57
|
cannam@24
|
58 if (decimationFactor > 1) {
|
cannam@24
|
59 decimator = new Decimator(getWindowsize(), decimationFactor);
|
cannam@24
|
60 }
|
cannam@24
|
61
|
cannam@24
|
62 CQConfig config;
|
cannam@24
|
63 config.FS = samplerate / decimationFactor;
|
cannam@24
|
64 config.min = fmin;
|
cannam@24
|
65 config.max = fmax;
|
cannam@24
|
66 config.BPO = nbins;
|
cannam@24
|
67 config.CQThresh = 0.0054;
|
cannam@24
|
68
|
cannam@24
|
69 constq = new ConstantQ(config);
|
cannam@24
|
70 constq->sparsekernel();
|
cannam@24
|
71
|
cannam@24
|
72 ncoeff = constq->getK();
|
cannam@24
|
73 }
|
cannam@18
|
74 }
|
cannam@18
|
75
|
cannam@18
|
76 ClusterMeltSegmenter::~ClusterMeltSegmenter()
|
cannam@18
|
77 {
|
cannam@24
|
78 delete window;
|
cannam@24
|
79 delete constq;
|
cannam@24
|
80 delete decimator;
|
cannam@20
|
81 }
|
cannam@20
|
82
|
cannam@20
|
83 int
|
cannam@20
|
84 ClusterMeltSegmenter::getWindowsize()
|
cannam@20
|
85 {
|
cannam@24
|
86 if (featureType != FEATURE_TYPE_UNKNOWN) {
|
cannam@24
|
87
|
cannam@24
|
88 if (constq) {
|
cannam@24
|
89 /*
|
cannam@24
|
90 std::cerr << "ClusterMeltSegmenter::getWindowsize: "
|
cannam@24
|
91 << "rate = " << samplerate
|
cannam@24
|
92 << ", dec factor = " << (decimator ? decimator->getFactor() : 1)
|
cannam@24
|
93 << ", fft length = " << constq->getfftlength()
|
cannam@24
|
94 << ", fmin = " << fmin
|
cannam@24
|
95 << ", fmax = " << fmax
|
cannam@24
|
96 << ", nbins = " << nbins
|
cannam@24
|
97 << ", K = " << constq->getK()
|
cannam@24
|
98 << ", Q = " << constq->getQ()
|
cannam@24
|
99 << std::endl;
|
cannam@24
|
100 */
|
cannam@24
|
101 }
|
cannam@24
|
102 }
|
cannam@24
|
103
|
cannam@24
|
104 return static_cast<int>(windowSize * samplerate);
|
cannam@20
|
105 }
|
cannam@20
|
106
|
cannam@20
|
107 int
|
cannam@20
|
108 ClusterMeltSegmenter::getHopsize()
|
cannam@20
|
109 {
|
cannam@24
|
110 return static_cast<int>(hopSize * samplerate);
|
cannam@18
|
111 }
|
cannam@18
|
112
|
cannam@24
|
113 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
|
cannam@18
|
114 {
|
cannam@24
|
115 if (!constq) {
|
cannam@24
|
116 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: "
|
cannam@24
|
117 << "Cannot run unknown feature type (or initialise not called)"
|
cannam@24
|
118 << std::endl;
|
cannam@24
|
119 return;
|
cannam@24
|
120 }
|
cannam@20
|
121
|
cannam@24
|
122 if (nsamples < getWindowsize()) {
|
cannam@24
|
123 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
|
cannam@24
|
124 return;
|
cannam@24
|
125 }
|
cannam@24
|
126
|
cannam@24
|
127 int fftsize = constq->getfftlength();
|
cannam@24
|
128
|
cannam@24
|
129 if (!window || window->getSize() != fftsize) {
|
cannam@24
|
130 delete window;
|
cannam@24
|
131 window = new Window<double>(HammingWindow, fftsize);
|
cannam@24
|
132 }
|
cannam@24
|
133
|
cannam@24
|
134 vector<double> cq(ncoeff);
|
cannam@24
|
135
|
cannam@24
|
136 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
|
cannam@24
|
137
|
cannam@24
|
138 const double *psource = samples;
|
cannam@24
|
139 int pcount = nsamples;
|
cannam@24
|
140
|
cannam@24
|
141 if (decimator) {
|
cannam@24
|
142 pcount = nsamples / decimator->getFactor();
|
cannam@24
|
143 double *decout = new double[pcount];
|
cannam@24
|
144 decimator->process(samples, decout);
|
cannam@24
|
145 psource = decout;
|
cannam@24
|
146 }
|
cannam@24
|
147
|
cannam@24
|
148 int origin = 0;
|
cannam@24
|
149
|
cannam@24
|
150 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
|
cannam@24
|
151
|
cannam@24
|
152 int frames = 0;
|
cannam@24
|
153
|
cannam@24
|
154 double *frame = new double[fftsize];
|
cannam@24
|
155 double *real = new double[fftsize];
|
cannam@24
|
156 double *imag = new double[fftsize];
|
cannam@24
|
157 double *cqre = new double[ncoeff];
|
cannam@24
|
158 double *cqim = new double[ncoeff];
|
cannam@24
|
159
|
cannam@24
|
160 while (origin <= pcount) {
|
cannam@24
|
161
|
cannam@24
|
162 // always need at least one fft window per block, but after
|
cannam@24
|
163 // that we want to avoid having any incomplete ones
|
cannam@24
|
164 if (origin > 0 && origin + fftsize >= pcount) break;
|
cannam@24
|
165
|
cannam@24
|
166 for (int i = 0; i < fftsize; ++i) {
|
cannam@24
|
167 if (origin + i < pcount) {
|
cannam@24
|
168 frame[i] = psource[origin + i];
|
cannam@24
|
169 } else {
|
cannam@24
|
170 frame[i] = 0.0;
|
cannam@24
|
171 }
|
cannam@24
|
172 }
|
cannam@24
|
173
|
cannam@24
|
174 for (int i = 0; i < fftsize/2; ++i) {
|
cannam@24
|
175 double value = frame[i];
|
cannam@24
|
176 frame[i] = frame[i + fftsize/2];
|
cannam@24
|
177 frame[i + fftsize/2] = value;
|
cannam@24
|
178 }
|
cannam@24
|
179
|
cannam@24
|
180 window->cut(frame);
|
cannam@24
|
181
|
cannam@24
|
182 FFT::process(fftsize, false, frame, 0, real, imag);
|
cannam@24
|
183
|
cannam@24
|
184 constq->process(real, imag, cqre, cqim);
|
cannam@18
|
185
|
cannam@24
|
186 for (int i = 0; i < ncoeff; ++i) {
|
cannam@24
|
187 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
|
cannam@24
|
188 }
|
cannam@24
|
189 ++frames;
|
cannam@20
|
190
|
cannam@24
|
191 origin += fftsize/2;
|
cannam@24
|
192 }
|
cannam@20
|
193
|
cannam@24
|
194 delete [] cqre;
|
cannam@24
|
195 delete [] cqim;
|
cannam@24
|
196 delete [] real;
|
cannam@24
|
197 delete [] imag;
|
cannam@24
|
198 delete [] frame;
|
cannam@20
|
199
|
cannam@24
|
200 for (int i = 0; i < ncoeff; ++i) {
|
cannam@24
|
201 // std::cerr << cq[i] << " ";
|
cannam@24
|
202 cq[i] /= frames;
|
cannam@24
|
203 }
|
cannam@24
|
204 // std::cerr << std::endl;
|
cannam@20
|
205
|
cannam@24
|
206 if (decimator) delete[] psource;
|
cannam@20
|
207
|
cannam@24
|
208 features.push_back(cq);
|
cannam@18
|
209 }
|
cannam@18
|
210
|
cannam@18
|
211 void ClusterMeltSegmenter::segment(int m)
|
cannam@18
|
212 {
|
cannam@24
|
213 nclusters = m;
|
cannam@24
|
214 segment();
|
cannam@18
|
215 }
|
cannam@18
|
216
|
cannam@18
|
217 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
|
cannam@18
|
218 {
|
cannam@24
|
219 features = f;
|
cannam@24
|
220 featureType = FEATURE_TYPE_UNKNOWN;
|
cannam@18
|
221 }
|
cannam@18
|
222
|
cannam@18
|
223 void ClusterMeltSegmenter::segment()
|
cannam@18
|
224 {
|
cannam@24
|
225 if (constq)
|
cannam@24
|
226 {
|
cannam@24
|
227 delete constq;
|
cannam@24
|
228 constq = 0;
|
cannam@24
|
229 delete decimator;
|
cannam@24
|
230 decimator = 0;
|
cannam@24
|
231 }
|
cannam@18
|
232
|
cannam@24
|
233 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
|
cannam@24
|
234 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
|
cannam@24
|
235
|
cannam@24
|
236 // copy the features to a native array and use the existing C segmenter...
|
cannam@24
|
237 double** arrFeatures = new double*[features.size()];
|
cannam@24
|
238 for (int i = 0; i < features.size(); i++)
|
cannam@24
|
239 {
|
cannam@24
|
240 if (featureType == FEATURE_TYPE_UNKNOWN) {
|
cannam@24
|
241 arrFeatures[i] = new double[features[0].size()];
|
cannam@24
|
242 for (int j = 0; j < features[0].size(); j++)
|
cannam@24
|
243 arrFeatures[i][j] = features[i][j];
|
cannam@24
|
244 } else {
|
cannam@24
|
245 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
|
cannam@24
|
246 for (int j = 0; j < ncoeff; j++)
|
cannam@24
|
247 arrFeatures[i][j] = features[i][j];
|
cannam@24
|
248 }
|
cannam@24
|
249 }
|
cannam@18
|
250
|
cannam@24
|
251 q = new int[features.size()];
|
cannam@18
|
252
|
cannam@24
|
253 if (featureType == FEATURE_TYPE_UNKNOWN)
|
cannam@24
|
254 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
|
cannam@24
|
255 nclusters, neighbourhoodLimit);
|
cannam@24
|
256 else
|
cannam@24
|
257 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
|
cannam@24
|
258 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
|
cannam@18
|
259
|
cannam@24
|
260 // convert the cluster assignment sequence to a segmentation
|
cannam@24
|
261 makeSegmentation(q, features.size());
|
cannam@18
|
262
|
cannam@24
|
263 // de-allocate arrays
|
cannam@24
|
264 delete [] q;
|
cannam@24
|
265 for (int i = 0; i < features.size(); i++)
|
cannam@24
|
266 delete [] arrFeatures[i];
|
cannam@24
|
267 delete [] arrFeatures;
|
cannam@18
|
268
|
cannam@24
|
269 // clear the features
|
cannam@24
|
270 clear();
|
cannam@18
|
271 }
|
cannam@18
|
272
|
cannam@18
|
273 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
|
cannam@18
|
274 {
|
cannam@24
|
275 segmentation.segments.clear();
|
cannam@24
|
276 segmentation.nsegtypes = nclusters;
|
cannam@24
|
277 segmentation.samplerate = samplerate;
|
cannam@18
|
278
|
cannam@24
|
279 Segment segment;
|
cannam@24
|
280 segment.start = 0;
|
cannam@24
|
281 segment.type = q[0];
|
cannam@18
|
282
|
cannam@24
|
283 for (int i = 1; i < len; i++)
|
cannam@24
|
284 {
|
cannam@24
|
285 if (q[i] != q[i-1])
|
cannam@24
|
286 {
|
cannam@24
|
287 segment.end = i * getHopsize();
|
cannam@24
|
288 segmentation.segments.push_back(segment);
|
cannam@24
|
289 segment.type = q[i];
|
cannam@24
|
290 segment.start = segment.end;
|
cannam@24
|
291 }
|
cannam@24
|
292 }
|
cannam@24
|
293 segment.end = len * getHopsize();
|
cannam@24
|
294 segmentation.segments.push_back(segment);
|
cannam@18
|
295 }
|
cannam@18
|
296
|