Mercurial > hg > qm-vamp-plugins
comparison plugins/SimilarityPlugin.cpp @ 41:b9fb6dee85f7
* Add similarity plugin
author | Chris Cannam <c.cannam@qmul.ac.uk> |
---|---|
date | Fri, 11 Jan 2008 18:18:45 +0000 |
parents | |
children | 0f85778f1b53 |
comparison
equal
deleted
inserted
replaced
40:77e394a5f3c9 | 41:b9fb6dee85f7 |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 /* | |
4 * SegmenterPlugin.cpp | |
5 * | |
6 * Copyright 2008 Centre for Digital Music, Queen Mary, University of London. | |
7 * All rights reserved. | |
8 */ | |
9 | |
10 #include <iostream> | |
11 #include <sstream> | |
12 | |
13 #include "SimilarityPlugin.h" | |
14 #include "dsp/mfcc/MFCC.h" | |
15 #include "dsp/rateconversion/Decimator.h" | |
16 | |
17 using std::string; | |
18 using std::vector; | |
19 using std::cerr; | |
20 using std::endl; | |
21 using std::ostringstream; | |
22 | |
23 SimilarityPlugin::SimilarityPlugin(float inputSampleRate) : | |
24 Plugin(inputSampleRate), | |
25 m_mfcc(0), | |
26 m_decimator(0), | |
27 m_K(20), | |
28 m_blockSize(0), | |
29 m_channels(0) | |
30 { | |
31 | |
32 } | |
33 | |
34 SimilarityPlugin::~SimilarityPlugin() | |
35 { | |
36 delete m_mfcc; | |
37 delete m_decimator; | |
38 } | |
39 | |
40 string | |
41 SimilarityPlugin::getIdentifier() const | |
42 { | |
43 return "qm-similarity"; | |
44 } | |
45 | |
46 string | |
47 SimilarityPlugin::getName() const | |
48 { | |
49 return "Similarity"; | |
50 } | |
51 | |
52 string | |
53 SimilarityPlugin::getDescription() const | |
54 { | |
55 return "Return a distance metric for overall timbral similarity between the input audio channels"; | |
56 } | |
57 | |
58 string | |
59 SimilarityPlugin::getMaker() const | |
60 { | |
61 return "Chris Cannam, Queen Mary, University of London"; | |
62 } | |
63 | |
64 int | |
65 SimilarityPlugin::getPluginVersion() const | |
66 { | |
67 return 1; | |
68 } | |
69 | |
70 string | |
71 SimilarityPlugin::getCopyright() const | |
72 { | |
73 return "Copyright (c) 2008 - All Rights Reserved"; | |
74 } | |
75 | |
76 size_t | |
77 SimilarityPlugin::getMinChannelCount() const | |
78 { | |
79 return 2; | |
80 } | |
81 | |
82 size_t | |
83 SimilarityPlugin::getMaxChannelCount() const | |
84 { | |
85 return 1024; | |
86 } | |
87 | |
88 bool | |
89 SimilarityPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize) | |
90 { | |
91 if (channels < getMinChannelCount() || | |
92 channels > getMaxChannelCount()) return false; | |
93 | |
94 if (stepSize != getPreferredStepSize()) { | |
95 std::cerr << "SimilarityPlugin::initialise: supplied step size " | |
96 << stepSize << " differs from required step size " | |
97 << getPreferredStepSize() << std::endl; | |
98 return false; | |
99 } | |
100 | |
101 if (blockSize != getPreferredBlockSize()) { | |
102 std::cerr << "SimilarityPlugin::initialise: supplied block size " | |
103 << blockSize << " differs from required block size " | |
104 << getPreferredBlockSize() << std::endl; | |
105 return false; | |
106 } | |
107 | |
108 m_blockSize = blockSize; | |
109 m_channels = channels; | |
110 | |
111 int decimationFactor = getDecimationFactor(); | |
112 if (decimationFactor > 1) { | |
113 m_decimator = new Decimator(getPreferredBlockSize(), decimationFactor); | |
114 } | |
115 | |
116 MFCCConfig config; | |
117 config.FS = lrintf(m_inputSampleRate) / decimationFactor; | |
118 config.fftsize = 2048; | |
119 config.nceps = m_K - 1; | |
120 config.want_c0 = true; | |
121 m_mfcc = new MFCC(config); | |
122 | |
123 for (int i = 0; i < m_channels; ++i) { | |
124 m_mfeatures.push_back(MFCCFeatureVector()); | |
125 } | |
126 | |
127 return true; | |
128 } | |
129 | |
130 void | |
131 SimilarityPlugin::reset() | |
132 { | |
133 //!!! | |
134 } | |
135 | |
136 int | |
137 SimilarityPlugin::getDecimationFactor() const | |
138 { | |
139 int rate = lrintf(m_inputSampleRate); | |
140 int internalRate = 22050; | |
141 int decimationFactor = rate / internalRate; | |
142 if (decimationFactor < 1) decimationFactor = 1; | |
143 | |
144 // must be a power of two | |
145 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor; | |
146 | |
147 return decimationFactor; | |
148 } | |
149 | |
150 size_t | |
151 SimilarityPlugin::getPreferredStepSize() const | |
152 { | |
153 return 1024 * getDecimationFactor(); | |
154 } | |
155 | |
156 size_t | |
157 SimilarityPlugin::getPreferredBlockSize() const | |
158 { | |
159 return 2048 * getDecimationFactor(); | |
160 } | |
161 | |
162 SimilarityPlugin::ParameterList SimilarityPlugin::getParameterDescriptors() const | |
163 { | |
164 ParameterList list; | |
165 return list; | |
166 } | |
167 | |
168 float | |
169 SimilarityPlugin::getParameter(std::string param) const | |
170 { | |
171 std::cerr << "WARNING: SimilarityPlugin::getParameter: unknown parameter \"" | |
172 << param << "\"" << std::endl; | |
173 return 0.0; | |
174 } | |
175 | |
176 void | |
177 SimilarityPlugin::setParameter(std::string param, float value) | |
178 { | |
179 std::cerr << "WARNING: SimilarityPlugin::setParameter: unknown parameter \"" | |
180 << param << "\"" << std::endl; | |
181 } | |
182 | |
183 SimilarityPlugin::OutputList | |
184 SimilarityPlugin::getOutputDescriptors() const | |
185 { | |
186 OutputList list; | |
187 | |
188 OutputDescriptor similarity; | |
189 similarity.identifier = "distance"; | |
190 similarity.name = "Distance"; | |
191 similarity.description = "Distance Metric for Timbral Similarity (smaller = more similar)"; | |
192 similarity.unit = ""; | |
193 similarity.hasFixedBinCount = true; | |
194 similarity.binCount = m_channels; | |
195 similarity.hasKnownExtents = false; | |
196 similarity.isQuantized = false; | |
197 similarity.sampleType = OutputDescriptor::FixedSampleRate; | |
198 similarity.sampleRate = 1; | |
199 | |
200 list.push_back(similarity); | |
201 | |
202 OutputDescriptor means; | |
203 means.identifier = "means"; | |
204 means.name = "MFCC Means"; | |
205 means.description = ""; | |
206 means.unit = ""; | |
207 means.hasFixedBinCount = true; | |
208 means.binCount = m_channels; | |
209 means.hasKnownExtents = false; | |
210 means.isQuantized = false; | |
211 means.sampleType = OutputDescriptor::VariableSampleRate; | |
212 means.sampleRate = m_inputSampleRate / getPreferredStepSize(); | |
213 | |
214 list.push_back(means); | |
215 | |
216 OutputDescriptor variances; | |
217 variances.identifier = "variances"; | |
218 variances.name = "MFCC Variances"; | |
219 variances.description = ""; | |
220 variances.unit = ""; | |
221 variances.hasFixedBinCount = true; | |
222 variances.binCount = m_channels; | |
223 variances.hasKnownExtents = false; | |
224 variances.isQuantized = false; | |
225 variances.sampleType = OutputDescriptor::VariableSampleRate; | |
226 variances.sampleRate = m_inputSampleRate / getPreferredStepSize(); | |
227 | |
228 list.push_back(variances); | |
229 | |
230 return list; | |
231 } | |
232 | |
233 SimilarityPlugin::FeatureSet | |
234 SimilarityPlugin::process(const float *const *inputBuffers, Vamp::RealTime /* timestamp */) | |
235 { | |
236 double *dblbuf = new double[m_blockSize]; | |
237 double *decbuf = dblbuf; | |
238 if (m_decimator) decbuf = new double[m_mfcc->getfftlength()]; | |
239 double *ceps = new double[m_K]; | |
240 | |
241 for (size_t c = 0; c < m_channels; ++c) { | |
242 | |
243 for (int i = 0; i < m_blockSize; ++i) { | |
244 dblbuf[i] = inputBuffers[c][i]; | |
245 } | |
246 | |
247 if (m_decimator) { | |
248 m_decimator->process(dblbuf, decbuf); | |
249 } | |
250 | |
251 m_mfcc->process(m_mfcc->getfftlength(), decbuf, ceps); | |
252 | |
253 MFCCFeature mf(m_K); | |
254 for (int i = 0; i < m_K; ++i) mf[i] = ceps[i]; | |
255 | |
256 m_mfeatures[c].push_back(mf); | |
257 } | |
258 | |
259 if (m_decimator) delete[] decbuf; | |
260 delete[] dblbuf; | |
261 delete[] ceps; | |
262 | |
263 return FeatureSet(); | |
264 } | |
265 | |
266 SimilarityPlugin::FeatureSet | |
267 SimilarityPlugin::getRemainingFeatures() | |
268 { | |
269 std::vector<MFCCFeature> m(m_channels); | |
270 std::vector<MFCCFeature> v(m_channels); | |
271 | |
272 //!!! bail if m_mfeatures vectors are empty | |
273 | |
274 for (int i = 0; i < m_channels; ++i) { | |
275 | |
276 MFCCFeature mean(m_K), variance(m_K); | |
277 | |
278 for (int j = 0; j < m_K; ++j) { | |
279 | |
280 mean[j] = variance[j] = 0.0; | |
281 int count; | |
282 | |
283 count = 0; | |
284 for (int k = 0; k < m_mfeatures[i].size(); ++k) { | |
285 double val = m_mfeatures[i][k][j]; | |
286 // std::cout << "val = " << val << std::endl; | |
287 if (isnan(val) || isinf(val)) continue; | |
288 mean[j] += val; | |
289 // std::cout << "mean now = " << mean[j] << std::endl; | |
290 ++count; | |
291 } | |
292 if (count > 0) mean[j] /= count; | |
293 // std::cout << "divided by " << count << ", mean now " << mean[j] << std::endl; | |
294 | |
295 count = 0; | |
296 for (int k = 0; k < m_mfeatures[i].size(); ++k) { | |
297 double val = ((m_mfeatures[i][k][j] - mean[j]) * | |
298 (m_mfeatures[i][k][j] - mean[j])); | |
299 if (isnan(val) || isinf(val)) continue; | |
300 variance[j] += val; | |
301 ++count; | |
302 } | |
303 if (count > 0) variance[j] /= count; | |
304 } | |
305 | |
306 m[i] = mean; | |
307 v[i] = variance; | |
308 } | |
309 | |
310 // std::cout << "m[0][0] = " << m[0][0] << std::endl; | |
311 | |
312 // so we sorta return a matrix of the distances between channels, | |
313 // but Vamp doesn't have a matrix return type so we actually | |
314 // return a series of vectors | |
315 | |
316 std::vector<std::vector<double> > distances; | |
317 | |
318 for (int i = 0; i < m_channels; ++i) { | |
319 distances.push_back(std::vector<double>()); | |
320 for (int j = 0; j < m_channels; ++j) { | |
321 double d = -2.0 * m_K; | |
322 for (int k = 0; k < m_K; ++k) { | |
323 // m[i][k] is the mean of mfcc k for channel i | |
324 // v[i][k] is the variance of mfcc k for channel i | |
325 d += v[i][k] / v[j][k] + v[j][k] / v[i][k]; | |
326 d += (m[i][k] - m[j][k]) | |
327 * (1.0 / v[i][k] + 1.0 / v[j][k]) | |
328 * (m[i][k] - m[j][k]); | |
329 } | |
330 d /= 2.0; | |
331 distances[i].push_back(d); | |
332 } | |
333 } | |
334 | |
335 FeatureSet returnFeatures; | |
336 | |
337 for (int i = 0; i < m_channels; ++i) { | |
338 | |
339 Feature feature; | |
340 feature.hasTimestamp = true; // otherwise hosts will tend to stamp them at the end of the file, which is annoying | |
341 feature.timestamp = Vamp::RealTime(i, 0); | |
342 | |
343 feature.values.clear(); | |
344 for (int k = 0; k < m_K; ++k) { | |
345 feature.values.push_back(m[i][k]); | |
346 } | |
347 | |
348 returnFeatures[1].push_back(feature); | |
349 | |
350 feature.values.clear(); | |
351 for (int k = 0; k < m_K; ++k) { | |
352 feature.values.push_back(v[i][k]); | |
353 } | |
354 | |
355 returnFeatures[2].push_back(feature); | |
356 | |
357 feature.values.clear(); | |
358 for (int j = 0; j < m_channels; ++j) { | |
359 feature.values.push_back(distances[i][j]); | |
360 } | |
361 ostringstream oss; | |
362 oss << "Distance from " << (i + 1); | |
363 feature.label = oss.str(); | |
364 | |
365 returnFeatures[0].push_back(feature); | |
366 } | |
367 | |
368 return returnFeatures; | |
369 } |