c@243
|
1 /*
|
c@243
|
2 * ClusterMeltSegmenter.cpp
|
c@243
|
3 * soundbite
|
c@243
|
4 *
|
c@243
|
5 * Created by Mark Levy on 23/03/2006.
|
c@243
|
6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved.
|
c@243
|
7 *
|
c@243
|
8 */
|
c@243
|
9
|
c@243
|
10 #include <cfloat>
|
c@243
|
11 #include <cmath>
|
c@243
|
12
|
c@243
|
13 #include "ClusterMeltSegmenter.h"
|
c@243
|
14 #include "cluster_segmenter.h"
|
c@243
|
15 #include "segment.h"
|
c@243
|
16
|
c@245
|
17 #include "dsp/transforms/FFT.h"
|
c@245
|
18
|
c@243
|
19 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : window(NULL),
|
c@243
|
20 constq(NULL),
|
c@243
|
21 featureType(params.featureType),
|
c@245
|
22 hopSize(params.hopSize),
|
c@243
|
23 windowSize(params.windowSize),
|
c@243
|
24 fmin(params.fmin),
|
c@243
|
25 fmax(params.fmax),
|
c@243
|
26 nbins(params.nbins),
|
c@243
|
27 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
|
c@243
|
28 nHMMStates(params.nHMMStates),
|
c@243
|
29 nclusters(params.nclusters),
|
c@243
|
30 histogramLength(params.histogramLength),
|
c@243
|
31 neighbourhoodLimit(params.neighbourhoodLimit)
|
c@243
|
32 {
|
c@243
|
33 }
|
c@243
|
34
|
c@243
|
35 void ClusterMeltSegmenter::initialise(int fs)
|
c@243
|
36 {
|
c@243
|
37 samplerate = fs;
|
c@243
|
38 if (featureType != FEATURE_TYPE_UNKNOWN)
|
c@243
|
39 {
|
c@245
|
40 //!!! ncoeff = static_cast<int>(ceil(nbins * (log(fmax / static_cast<double>(fmin))) / log(2.0)));
|
c@245
|
41 CQConfig config;
|
c@245
|
42 config.FS = samplerate;
|
c@245
|
43 config.min = fmin;
|
c@245
|
44 config.max = fmax;
|
c@245
|
45 config.BPO = nbins;
|
c@245
|
46 config.CQThresh = 0.0054;
|
c@245
|
47 constq = new ConstantQ(config);
|
c@245
|
48 //!!! constq = init_constQ(fmin, fmax, nbins, samplerate, ncoeff);
|
c@245
|
49 ncoeff = constq->getK();
|
c@243
|
50 }
|
c@243
|
51 }
|
c@243
|
52
|
c@243
|
53 ClusterMeltSegmenter::~ClusterMeltSegmenter()
|
c@243
|
54 {
|
c@245
|
55 delete window;
|
c@245
|
56 delete constq;
|
c@245
|
57 //!!! if (constq)
|
c@245
|
58 // close_constQ(constq);
|
c@245
|
59 }
|
c@245
|
60
|
c@245
|
61 int
|
c@245
|
62 ClusterMeltSegmenter::getWindowsize()
|
c@245
|
63 {
|
c@245
|
64 if (featureType != FEATURE_TYPE_UNKNOWN) {
|
c@245
|
65 std::cerr << "rate = " << samplerate << ", fft length = " << constq->getfftlength() << ", fmin = " << fmin << ", fmax = " << fmax << ", nbins = " << nbins << ", K = " << constq->getK() << ", Q = " << constq->getQ() << std::endl;
|
c@245
|
66 return constq->getfftlength();
|
c@245
|
67 } else {
|
c@245
|
68 return static_cast<int>(windowSize * samplerate);
|
c@245
|
69 }
|
c@245
|
70 }
|
c@245
|
71
|
c@245
|
72 int
|
c@245
|
73 ClusterMeltSegmenter::getHopsize()
|
c@245
|
74 {
|
c@245
|
75 return static_cast<int>(hopSize * samplerate);
|
c@243
|
76 }
|
c@243
|
77
|
c@243
|
78 void ClusterMeltSegmenter::extractFeatures(double* samples, int nsamples)
|
c@243
|
79 {
|
c@243
|
80 // create a new window if needed
|
c@245
|
81 /*!!!
|
c@243
|
82 if (!window || nsamples != windowLength)
|
c@243
|
83 {
|
c@243
|
84 if (window)
|
c@243
|
85 delete [] window;
|
c@245
|
86 // Window<double>(HammingWindow, nsamples).cut
|
c@245
|
87 //!!! window = hamming_p(nsamples);
|
c@243
|
88 windowLength = nsamples;
|
c@243
|
89 }
|
c@245
|
90 */
|
c@245
|
91 if (!window || window->getSize() != nsamples) {
|
c@245
|
92 delete window;
|
c@245
|
93 window = new Window<double>(HammingWindow, nsamples);
|
c@245
|
94 }
|
c@245
|
95
|
c@243
|
96 // copy the samples before windowing in case we need them for something else
|
c@243
|
97 double* frame = new double[nsamples];
|
c@245
|
98 // for (int i = 0; i < nsamples; i++)
|
c@245
|
99 // frame[i] = samples[i] * window[i];
|
c@245
|
100 window->cut(frame);
|
c@243
|
101
|
c@245
|
102 std::cerr << "nsamples = " << nsamples << std::endl;
|
c@245
|
103
|
c@245
|
104 double *real = new double[nsamples];
|
c@245
|
105 double *imag = new double[nsamples];
|
c@245
|
106
|
c@245
|
107 FFT::process(nsamples, false, frame, 0, real, imag);
|
c@245
|
108
|
c@245
|
109 double *cqre = new double[ncoeff];
|
c@245
|
110 double *cqim = new double[ncoeff];
|
c@245
|
111
|
c@245
|
112 constq->process(real, imag, cqre, cqim);
|
c@245
|
113
|
c@243
|
114 // extract const-Q
|
c@245
|
115 //!!! do_constQ(constq, frame, nsamples);
|
c@245
|
116 // int ncq = constq->ncoeff;
|
c@245
|
117
|
c@245
|
118 delete [] frame;
|
c@245
|
119 delete [] real;
|
c@245
|
120 delete [] imag;
|
c@243
|
121
|
c@245
|
122 //!!! if (ncq == ncoeff) // else feature extraction failed
|
c@245
|
123 // {
|
c@245
|
124 // vector<double> cq(ncq);
|
c@245
|
125 // for (int i = 0; i < ncq; i++)
|
c@245
|
126 // cq[i] = constq->absconstQtransform[i];
|
c@245
|
127 vector<double> cq(ncoeff);
|
c@245
|
128 for (int i = 0; i < ncoeff; ++i) {
|
c@245
|
129 cq[i] = sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
|
c@245
|
130 }
|
c@243
|
131 features.push_back(cq);
|
c@245
|
132 // }
|
c@245
|
133
|
c@245
|
134 delete[] cqre;
|
c@245
|
135 delete[] cqim;
|
c@243
|
136 }
|
c@243
|
137
|
c@243
|
138 void ClusterMeltSegmenter::segment(int m)
|
c@243
|
139 {
|
c@243
|
140 nclusters = m;
|
c@243
|
141 segment();
|
c@243
|
142 }
|
c@243
|
143
|
c@243
|
144 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
|
c@243
|
145 {
|
c@243
|
146 features = f;
|
c@243
|
147 featureType = FEATURE_TYPE_UNKNOWN;
|
c@243
|
148 }
|
c@243
|
149
|
c@243
|
150 void ClusterMeltSegmenter::segment()
|
c@243
|
151 {
|
c@243
|
152 if (constq)
|
c@243
|
153 {
|
c@245
|
154 //!!! close_constQ(constq); // finished extracting features
|
c@245
|
155 delete constq;
|
c@243
|
156 constq = NULL;
|
c@243
|
157 }
|
c@243
|
158
|
c@243
|
159 // for now copy the features to a native array and use the existing C segmenter...
|
c@243
|
160 double** arrFeatures = new double*[features.size()];
|
c@243
|
161 for (int i = 0; i < features.size(); i++)
|
c@243
|
162 {
|
c@243
|
163 if (featureType == FEATURE_TYPE_UNKNOWN)
|
c@243
|
164 arrFeatures[i] = new double[features[0].size()];
|
c@243
|
165 else
|
c@243
|
166 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
|
c@243
|
167 for (int j = 0; j < ncoeff; j++)
|
c@243
|
168 arrFeatures[i][j] = features[i][j];
|
c@243
|
169 }
|
c@243
|
170
|
c@243
|
171 q = new int[features.size()];
|
c@243
|
172
|
c@243
|
173 if (featureType == FEATURE_TYPE_UNKNOWN)
|
c@243
|
174 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
|
c@243
|
175 nclusters, neighbourhoodLimit);
|
c@243
|
176 else
|
c@243
|
177 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
|
c@243
|
178 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
|
c@243
|
179
|
c@243
|
180 // convert the cluster assignment sequence to a segmentation
|
c@243
|
181 makeSegmentation(q, features.size());
|
c@243
|
182
|
c@243
|
183 // de-allocate arrays
|
c@243
|
184 delete [] q;
|
c@243
|
185 for (int i = 0; i < features.size(); i++)
|
c@243
|
186 delete [] arrFeatures[i];
|
c@243
|
187 delete [] arrFeatures;
|
c@243
|
188
|
c@243
|
189 // clear the features
|
c@243
|
190 clear();
|
c@243
|
191 }
|
c@243
|
192
|
c@243
|
193 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
|
c@243
|
194 {
|
c@243
|
195 segmentation.segments.clear();
|
c@243
|
196 segmentation.nsegtypes = nclusters;
|
c@243
|
197 segmentation.samplerate = samplerate;
|
c@243
|
198
|
c@243
|
199 Segment segment;
|
c@243
|
200 segment.start = 0;
|
c@243
|
201 segment.type = q[0];
|
c@243
|
202
|
c@243
|
203 for (int i = 1; i < len; i++)
|
c@243
|
204 {
|
c@243
|
205 if (q[i] != q[i-1])
|
c@243
|
206 {
|
c@243
|
207 segment.end = i * getHopsize();
|
c@243
|
208 segmentation.segments.push_back(segment);
|
c@243
|
209 segment.type = q[i];
|
c@243
|
210 segment.start = segment.end;
|
c@243
|
211 }
|
c@243
|
212 }
|
c@243
|
213 segment.end = len * getHopsize();
|
c@243
|
214 segmentation.segments.push_back(segment);
|
c@243
|
215 }
|
c@243
|
216
|
c@243
|
217 /*
|
c@243
|
218 void ClusterMeltSegmenter::mpeg7ConstQ()
|
c@243
|
219 {
|
c@243
|
220 // convert to dB scale
|
c@243
|
221 for (int i = 0; i < features.size(); i++)
|
c@243
|
222 for (int j = 0; j < ncoeff; j++)
|
c@243
|
223 features[i][j] = 10.0 * log10(features[i][j] + DBL_EPSILON);
|
c@243
|
224
|
c@243
|
225 // normalise features and add the norm at the end as an extra feature dimension
|
c@243
|
226 double maxnorm = 0; // track the max of the norms
|
c@243
|
227 for (int i = 0; i < features.size(); i++)
|
c@243
|
228 {
|
c@243
|
229 double norm = 0;
|
c@243
|
230 for (int j = 0; j < ncoeff; j++)
|
c@243
|
231 norm += features[i][j] * features[i][j];
|
c@243
|
232 norm = sqrt(norm);
|
c@243
|
233 for (int j = 0; j < ncoeff; j++)
|
c@243
|
234 features[i][j] /= norm;
|
c@243
|
235 features[i].push_back(norm);
|
c@243
|
236 if (norm > maxnorm)
|
c@243
|
237 maxnorm = norm;
|
c@243
|
238 }
|
c@243
|
239
|
c@243
|
240 // normalise the norms
|
c@243
|
241 for (int i = 0; i < features.size(); i++)
|
c@243
|
242 features[i][ncoeff] /= maxnorm;
|
c@243
|
243 }
|
c@243
|
244 */
|