cannam@18
|
1 /*
|
cannam@18
|
2 * ClusterMeltSegmenter.cpp
|
cannam@18
|
3 * soundbite
|
cannam@18
|
4 *
|
cannam@18
|
5 * Created by Mark Levy on 23/03/2006.
|
cannam@18
|
6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved.
|
cannam@18
|
7 *
|
cannam@18
|
8 */
|
cannam@18
|
9
|
cannam@18
|
10 #include <cfloat>
|
cannam@18
|
11 #include <cmath>
|
cannam@18
|
12
|
cannam@18
|
13 #include "ClusterMeltSegmenter.h"
|
cannam@18
|
14 #include "cluster_segmenter.h"
|
cannam@18
|
15 #include "segment.h"
|
cannam@18
|
16
|
cannam@20
|
17 #include "dsp/transforms/FFT.h"
|
cannam@20
|
18
|
cannam@18
|
19 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : window(NULL),
|
cannam@18
|
20 constq(NULL),
|
cannam@18
|
21 featureType(params.featureType),
|
cannam@20
|
22 hopSize(params.hopSize),
|
cannam@18
|
23 windowSize(params.windowSize),
|
cannam@18
|
24 fmin(params.fmin),
|
cannam@18
|
25 fmax(params.fmax),
|
cannam@18
|
26 nbins(params.nbins),
|
cannam@18
|
27 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
|
cannam@18
|
28 nHMMStates(params.nHMMStates),
|
cannam@18
|
29 nclusters(params.nclusters),
|
cannam@18
|
30 histogramLength(params.histogramLength),
|
cannam@18
|
31 neighbourhoodLimit(params.neighbourhoodLimit)
|
cannam@18
|
32 {
|
cannam@18
|
33 }
|
cannam@18
|
34
|
cannam@18
|
35 void ClusterMeltSegmenter::initialise(int fs)
|
cannam@18
|
36 {
|
cannam@18
|
37 samplerate = fs;
|
cannam@18
|
38 if (featureType != FEATURE_TYPE_UNKNOWN)
|
cannam@18
|
39 {
|
cannam@20
|
40 //!!! ncoeff = static_cast<int>(ceil(nbins * (log(fmax / static_cast<double>(fmin))) / log(2.0)));
|
cannam@20
|
41 CQConfig config;
|
cannam@20
|
42 config.FS = samplerate;
|
cannam@20
|
43 config.min = fmin;
|
cannam@20
|
44 config.max = fmax;
|
cannam@20
|
45 config.BPO = nbins;
|
cannam@20
|
46 config.CQThresh = 0.0054;
|
cannam@20
|
47 constq = new ConstantQ(config);
|
cannam@20
|
48 //!!! constq = init_constQ(fmin, fmax, nbins, samplerate, ncoeff);
|
cannam@20
|
49 ncoeff = constq->getK();
|
cannam@18
|
50 }
|
cannam@18
|
51 }
|
cannam@18
|
52
|
cannam@18
|
53 ClusterMeltSegmenter::~ClusterMeltSegmenter()
|
cannam@18
|
54 {
|
cannam@20
|
55 delete window;
|
cannam@20
|
56 delete constq;
|
cannam@20
|
57 //!!! if (constq)
|
cannam@20
|
58 // close_constQ(constq);
|
cannam@20
|
59 }
|
cannam@20
|
60
|
cannam@20
|
61 int
|
cannam@20
|
62 ClusterMeltSegmenter::getWindowsize()
|
cannam@20
|
63 {
|
cannam@20
|
64 if (featureType != FEATURE_TYPE_UNKNOWN) {
|
cannam@20
|
65 std::cerr << "rate = " << samplerate << ", fft length = " << constq->getfftlength() << ", fmin = " << fmin << ", fmax = " << fmax << ", nbins = " << nbins << ", K = " << constq->getK() << ", Q = " << constq->getQ() << std::endl;
|
cannam@20
|
66 return constq->getfftlength();
|
cannam@20
|
67 } else {
|
cannam@20
|
68 return static_cast<int>(windowSize * samplerate);
|
cannam@20
|
69 }
|
cannam@20
|
70 }
|
cannam@20
|
71
|
cannam@20
|
72 int
|
cannam@20
|
73 ClusterMeltSegmenter::getHopsize()
|
cannam@20
|
74 {
|
cannam@20
|
75 return static_cast<int>(hopSize * samplerate);
|
cannam@18
|
76 }
|
cannam@18
|
77
|
cannam@18
|
78 void ClusterMeltSegmenter::extractFeatures(double* samples, int nsamples)
|
cannam@18
|
79 {
|
cannam@18
|
80 // create a new window if needed
|
cannam@20
|
81 /*!!!
|
cannam@18
|
82 if (!window || nsamples != windowLength)
|
cannam@18
|
83 {
|
cannam@18
|
84 if (window)
|
cannam@18
|
85 delete [] window;
|
cannam@20
|
86 // Window<double>(HammingWindow, nsamples).cut
|
cannam@20
|
87 //!!! window = hamming_p(nsamples);
|
cannam@18
|
88 windowLength = nsamples;
|
cannam@18
|
89 }
|
cannam@20
|
90 */
|
cannam@20
|
91 if (!window || window->getSize() != nsamples) {
|
cannam@20
|
92 delete window;
|
cannam@20
|
93 window = new Window<double>(HammingWindow, nsamples);
|
cannam@20
|
94 }
|
cannam@20
|
95
|
cannam@18
|
96 // copy the samples before windowing in case we need them for something else
|
cannam@18
|
97 double* frame = new double[nsamples];
|
cannam@20
|
98 // for (int i = 0; i < nsamples; i++)
|
cannam@20
|
99 // frame[i] = samples[i] * window[i];
|
cannam@20
|
100 window->cut(frame);
|
cannam@18
|
101
|
cannam@20
|
102 std::cerr << "nsamples = " << nsamples << std::endl;
|
cannam@20
|
103
|
cannam@20
|
104 double *real = new double[nsamples];
|
cannam@20
|
105 double *imag = new double[nsamples];
|
cannam@20
|
106
|
cannam@20
|
107 FFT::process(nsamples, false, frame, 0, real, imag);
|
cannam@20
|
108
|
cannam@20
|
109 double *cqre = new double[ncoeff];
|
cannam@20
|
110 double *cqim = new double[ncoeff];
|
cannam@20
|
111
|
cannam@20
|
112 constq->process(real, imag, cqre, cqim);
|
cannam@20
|
113
|
cannam@18
|
114 // extract const-Q
|
cannam@20
|
115 //!!! do_constQ(constq, frame, nsamples);
|
cannam@20
|
116 // int ncq = constq->ncoeff;
|
cannam@20
|
117
|
cannam@20
|
118 delete [] frame;
|
cannam@20
|
119 delete [] real;
|
cannam@20
|
120 delete [] imag;
|
cannam@18
|
121
|
cannam@20
|
122 //!!! if (ncq == ncoeff) // else feature extraction failed
|
cannam@20
|
123 // {
|
cannam@20
|
124 // vector<double> cq(ncq);
|
cannam@20
|
125 // for (int i = 0; i < ncq; i++)
|
cannam@20
|
126 // cq[i] = constq->absconstQtransform[i];
|
cannam@20
|
127 vector<double> cq(ncoeff);
|
cannam@20
|
128 for (int i = 0; i < ncoeff; ++i) {
|
cannam@20
|
129 cq[i] = sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
|
cannam@20
|
130 }
|
cannam@18
|
131 features.push_back(cq);
|
cannam@20
|
132 // }
|
cannam@20
|
133
|
cannam@20
|
134 delete[] cqre;
|
cannam@20
|
135 delete[] cqim;
|
cannam@18
|
136 }
|
cannam@18
|
137
|
cannam@18
|
138 void ClusterMeltSegmenter::segment(int m)
|
cannam@18
|
139 {
|
cannam@18
|
140 nclusters = m;
|
cannam@18
|
141 segment();
|
cannam@18
|
142 }
|
cannam@18
|
143
|
cannam@18
|
144 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
|
cannam@18
|
145 {
|
cannam@18
|
146 features = f;
|
cannam@18
|
147 featureType = FEATURE_TYPE_UNKNOWN;
|
cannam@18
|
148 }
|
cannam@18
|
149
|
cannam@18
|
150 void ClusterMeltSegmenter::segment()
|
cannam@18
|
151 {
|
cannam@18
|
152 if (constq)
|
cannam@18
|
153 {
|
cannam@20
|
154 //!!! close_constQ(constq); // finished extracting features
|
cannam@20
|
155 delete constq;
|
cannam@18
|
156 constq = NULL;
|
cannam@18
|
157 }
|
cannam@18
|
158
|
cannam@18
|
159 // for now copy the features to a native array and use the existing C segmenter...
|
cannam@18
|
160 double** arrFeatures = new double*[features.size()];
|
cannam@18
|
161 for (int i = 0; i < features.size(); i++)
|
cannam@18
|
162 {
|
cannam@18
|
163 if (featureType == FEATURE_TYPE_UNKNOWN)
|
cannam@18
|
164 arrFeatures[i] = new double[features[0].size()];
|
cannam@18
|
165 else
|
cannam@18
|
166 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
|
cannam@18
|
167 for (int j = 0; j < ncoeff; j++)
|
cannam@18
|
168 arrFeatures[i][j] = features[i][j];
|
cannam@18
|
169 }
|
cannam@18
|
170
|
cannam@18
|
171 q = new int[features.size()];
|
cannam@18
|
172
|
cannam@18
|
173 if (featureType == FEATURE_TYPE_UNKNOWN)
|
cannam@18
|
174 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
|
cannam@18
|
175 nclusters, neighbourhoodLimit);
|
cannam@18
|
176 else
|
cannam@18
|
177 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
|
cannam@18
|
178 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
|
cannam@18
|
179
|
cannam@18
|
180 // convert the cluster assignment sequence to a segmentation
|
cannam@18
|
181 makeSegmentation(q, features.size());
|
cannam@18
|
182
|
cannam@18
|
183 // de-allocate arrays
|
cannam@18
|
184 delete [] q;
|
cannam@18
|
185 for (int i = 0; i < features.size(); i++)
|
cannam@18
|
186 delete [] arrFeatures[i];
|
cannam@18
|
187 delete [] arrFeatures;
|
cannam@18
|
188
|
cannam@18
|
189 // clear the features
|
cannam@18
|
190 clear();
|
cannam@18
|
191 }
|
cannam@18
|
192
|
cannam@18
|
193 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
|
cannam@18
|
194 {
|
cannam@18
|
195 segmentation.segments.clear();
|
cannam@18
|
196 segmentation.nsegtypes = nclusters;
|
cannam@18
|
197 segmentation.samplerate = samplerate;
|
cannam@18
|
198
|
cannam@18
|
199 Segment segment;
|
cannam@18
|
200 segment.start = 0;
|
cannam@18
|
201 segment.type = q[0];
|
cannam@18
|
202
|
cannam@18
|
203 for (int i = 1; i < len; i++)
|
cannam@18
|
204 {
|
cannam@18
|
205 if (q[i] != q[i-1])
|
cannam@18
|
206 {
|
cannam@18
|
207 segment.end = i * getHopsize();
|
cannam@18
|
208 segmentation.segments.push_back(segment);
|
cannam@18
|
209 segment.type = q[i];
|
cannam@18
|
210 segment.start = segment.end;
|
cannam@18
|
211 }
|
cannam@18
|
212 }
|
cannam@18
|
213 segment.end = len * getHopsize();
|
cannam@18
|
214 segmentation.segments.push_back(segment);
|
cannam@18
|
215 }
|
cannam@18
|
216
|
cannam@18
|
217 /*
|
cannam@18
|
218 void ClusterMeltSegmenter::mpeg7ConstQ()
|
cannam@18
|
219 {
|
cannam@18
|
220 // convert to dB scale
|
cannam@18
|
221 for (int i = 0; i < features.size(); i++)
|
cannam@18
|
222 for (int j = 0; j < ncoeff; j++)
|
cannam@18
|
223 features[i][j] = 10.0 * log10(features[i][j] + DBL_EPSILON);
|
cannam@18
|
224
|
cannam@18
|
225 // normalise features and add the norm at the end as an extra feature dimension
|
cannam@18
|
226 double maxnorm = 0; // track the max of the norms
|
cannam@18
|
227 for (int i = 0; i < features.size(); i++)
|
cannam@18
|
228 {
|
cannam@18
|
229 double norm = 0;
|
cannam@18
|
230 for (int j = 0; j < ncoeff; j++)
|
cannam@18
|
231 norm += features[i][j] * features[i][j];
|
cannam@18
|
232 norm = sqrt(norm);
|
cannam@18
|
233 for (int j = 0; j < ncoeff; j++)
|
cannam@18
|
234 features[i][j] /= norm;
|
cannam@18
|
235 features[i].push_back(norm);
|
cannam@18
|
236 if (norm > maxnorm)
|
cannam@18
|
237 maxnorm = norm;
|
cannam@18
|
238 }
|
cannam@18
|
239
|
cannam@18
|
240 // normalise the norms
|
cannam@18
|
241 for (int i = 0; i < features.size(); i++)
|
cannam@18
|
242 features[i][ncoeff] /= maxnorm;
|
cannam@18
|
243 }
|
cannam@18
|
244 */
|