comparison plugins/SimilarityPlugin.cpp @ 41:b9fb6dee85f7

* Add similarity plugin
author Chris Cannam <c.cannam@qmul.ac.uk>
date Fri, 11 Jan 2008 18:18:45 +0000
parents
children 0f85778f1b53
comparison
equal deleted inserted replaced
40:77e394a5f3c9 41:b9fb6dee85f7
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 * SegmenterPlugin.cpp
5 *
6 * Copyright 2008 Centre for Digital Music, Queen Mary, University of London.
7 * All rights reserved.
8 */
9
10 #include <iostream>
11 #include <sstream>
12
13 #include "SimilarityPlugin.h"
14 #include "dsp/mfcc/MFCC.h"
15 #include "dsp/rateconversion/Decimator.h"
16
17 using std::string;
18 using std::vector;
19 using std::cerr;
20 using std::endl;
21 using std::ostringstream;
22
23 SimilarityPlugin::SimilarityPlugin(float inputSampleRate) :
24 Plugin(inputSampleRate),
25 m_mfcc(0),
26 m_decimator(0),
27 m_K(20),
28 m_blockSize(0),
29 m_channels(0)
30 {
31
32 }
33
34 SimilarityPlugin::~SimilarityPlugin()
35 {
36 delete m_mfcc;
37 delete m_decimator;
38 }
39
40 string
41 SimilarityPlugin::getIdentifier() const
42 {
43 return "qm-similarity";
44 }
45
46 string
47 SimilarityPlugin::getName() const
48 {
49 return "Similarity";
50 }
51
52 string
53 SimilarityPlugin::getDescription() const
54 {
55 return "Return a distance metric for overall timbral similarity between the input audio channels";
56 }
57
58 string
59 SimilarityPlugin::getMaker() const
60 {
61 return "Chris Cannam, Queen Mary, University of London";
62 }
63
64 int
65 SimilarityPlugin::getPluginVersion() const
66 {
67 return 1;
68 }
69
70 string
71 SimilarityPlugin::getCopyright() const
72 {
73 return "Copyright (c) 2008 - All Rights Reserved";
74 }
75
76 size_t
77 SimilarityPlugin::getMinChannelCount() const
78 {
79 return 2;
80 }
81
82 size_t
83 SimilarityPlugin::getMaxChannelCount() const
84 {
85 return 1024;
86 }
87
88 bool
89 SimilarityPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
90 {
91 if (channels < getMinChannelCount() ||
92 channels > getMaxChannelCount()) return false;
93
94 if (stepSize != getPreferredStepSize()) {
95 std::cerr << "SimilarityPlugin::initialise: supplied step size "
96 << stepSize << " differs from required step size "
97 << getPreferredStepSize() << std::endl;
98 return false;
99 }
100
101 if (blockSize != getPreferredBlockSize()) {
102 std::cerr << "SimilarityPlugin::initialise: supplied block size "
103 << blockSize << " differs from required block size "
104 << getPreferredBlockSize() << std::endl;
105 return false;
106 }
107
108 m_blockSize = blockSize;
109 m_channels = channels;
110
111 int decimationFactor = getDecimationFactor();
112 if (decimationFactor > 1) {
113 m_decimator = new Decimator(getPreferredBlockSize(), decimationFactor);
114 }
115
116 MFCCConfig config;
117 config.FS = lrintf(m_inputSampleRate) / decimationFactor;
118 config.fftsize = 2048;
119 config.nceps = m_K - 1;
120 config.want_c0 = true;
121 m_mfcc = new MFCC(config);
122
123 for (int i = 0; i < m_channels; ++i) {
124 m_mfeatures.push_back(MFCCFeatureVector());
125 }
126
127 return true;
128 }
129
130 void
131 SimilarityPlugin::reset()
132 {
133 //!!!
134 }
135
136 int
137 SimilarityPlugin::getDecimationFactor() const
138 {
139 int rate = lrintf(m_inputSampleRate);
140 int internalRate = 22050;
141 int decimationFactor = rate / internalRate;
142 if (decimationFactor < 1) decimationFactor = 1;
143
144 // must be a power of two
145 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
146
147 return decimationFactor;
148 }
149
150 size_t
151 SimilarityPlugin::getPreferredStepSize() const
152 {
153 return 1024 * getDecimationFactor();
154 }
155
156 size_t
157 SimilarityPlugin::getPreferredBlockSize() const
158 {
159 return 2048 * getDecimationFactor();
160 }
161
162 SimilarityPlugin::ParameterList SimilarityPlugin::getParameterDescriptors() const
163 {
164 ParameterList list;
165 return list;
166 }
167
168 float
169 SimilarityPlugin::getParameter(std::string param) const
170 {
171 std::cerr << "WARNING: SimilarityPlugin::getParameter: unknown parameter \""
172 << param << "\"" << std::endl;
173 return 0.0;
174 }
175
176 void
177 SimilarityPlugin::setParameter(std::string param, float value)
178 {
179 std::cerr << "WARNING: SimilarityPlugin::setParameter: unknown parameter \""
180 << param << "\"" << std::endl;
181 }
182
183 SimilarityPlugin::OutputList
184 SimilarityPlugin::getOutputDescriptors() const
185 {
186 OutputList list;
187
188 OutputDescriptor similarity;
189 similarity.identifier = "distance";
190 similarity.name = "Distance";
191 similarity.description = "Distance Metric for Timbral Similarity (smaller = more similar)";
192 similarity.unit = "";
193 similarity.hasFixedBinCount = true;
194 similarity.binCount = m_channels;
195 similarity.hasKnownExtents = false;
196 similarity.isQuantized = false;
197 similarity.sampleType = OutputDescriptor::FixedSampleRate;
198 similarity.sampleRate = 1;
199
200 list.push_back(similarity);
201
202 OutputDescriptor means;
203 means.identifier = "means";
204 means.name = "MFCC Means";
205 means.description = "";
206 means.unit = "";
207 means.hasFixedBinCount = true;
208 means.binCount = m_channels;
209 means.hasKnownExtents = false;
210 means.isQuantized = false;
211 means.sampleType = OutputDescriptor::VariableSampleRate;
212 means.sampleRate = m_inputSampleRate / getPreferredStepSize();
213
214 list.push_back(means);
215
216 OutputDescriptor variances;
217 variances.identifier = "variances";
218 variances.name = "MFCC Variances";
219 variances.description = "";
220 variances.unit = "";
221 variances.hasFixedBinCount = true;
222 variances.binCount = m_channels;
223 variances.hasKnownExtents = false;
224 variances.isQuantized = false;
225 variances.sampleType = OutputDescriptor::VariableSampleRate;
226 variances.sampleRate = m_inputSampleRate / getPreferredStepSize();
227
228 list.push_back(variances);
229
230 return list;
231 }
232
233 SimilarityPlugin::FeatureSet
234 SimilarityPlugin::process(const float *const *inputBuffers, Vamp::RealTime /* timestamp */)
235 {
236 double *dblbuf = new double[m_blockSize];
237 double *decbuf = dblbuf;
238 if (m_decimator) decbuf = new double[m_mfcc->getfftlength()];
239 double *ceps = new double[m_K];
240
241 for (size_t c = 0; c < m_channels; ++c) {
242
243 for (int i = 0; i < m_blockSize; ++i) {
244 dblbuf[i] = inputBuffers[c][i];
245 }
246
247 if (m_decimator) {
248 m_decimator->process(dblbuf, decbuf);
249 }
250
251 m_mfcc->process(m_mfcc->getfftlength(), decbuf, ceps);
252
253 MFCCFeature mf(m_K);
254 for (int i = 0; i < m_K; ++i) mf[i] = ceps[i];
255
256 m_mfeatures[c].push_back(mf);
257 }
258
259 if (m_decimator) delete[] decbuf;
260 delete[] dblbuf;
261 delete[] ceps;
262
263 return FeatureSet();
264 }
265
266 SimilarityPlugin::FeatureSet
267 SimilarityPlugin::getRemainingFeatures()
268 {
269 std::vector<MFCCFeature> m(m_channels);
270 std::vector<MFCCFeature> v(m_channels);
271
272 //!!! bail if m_mfeatures vectors are empty
273
274 for (int i = 0; i < m_channels; ++i) {
275
276 MFCCFeature mean(m_K), variance(m_K);
277
278 for (int j = 0; j < m_K; ++j) {
279
280 mean[j] = variance[j] = 0.0;
281 int count;
282
283 count = 0;
284 for (int k = 0; k < m_mfeatures[i].size(); ++k) {
285 double val = m_mfeatures[i][k][j];
286 // std::cout << "val = " << val << std::endl;
287 if (isnan(val) || isinf(val)) continue;
288 mean[j] += val;
289 // std::cout << "mean now = " << mean[j] << std::endl;
290 ++count;
291 }
292 if (count > 0) mean[j] /= count;
293 // std::cout << "divided by " << count << ", mean now " << mean[j] << std::endl;
294
295 count = 0;
296 for (int k = 0; k < m_mfeatures[i].size(); ++k) {
297 double val = ((m_mfeatures[i][k][j] - mean[j]) *
298 (m_mfeatures[i][k][j] - mean[j]));
299 if (isnan(val) || isinf(val)) continue;
300 variance[j] += val;
301 ++count;
302 }
303 if (count > 0) variance[j] /= count;
304 }
305
306 m[i] = mean;
307 v[i] = variance;
308 }
309
310 // std::cout << "m[0][0] = " << m[0][0] << std::endl;
311
312 // so we sorta return a matrix of the distances between channels,
313 // but Vamp doesn't have a matrix return type so we actually
314 // return a series of vectors
315
316 std::vector<std::vector<double> > distances;
317
318 for (int i = 0; i < m_channels; ++i) {
319 distances.push_back(std::vector<double>());
320 for (int j = 0; j < m_channels; ++j) {
321 double d = -2.0 * m_K;
322 for (int k = 0; k < m_K; ++k) {
323 // m[i][k] is the mean of mfcc k for channel i
324 // v[i][k] is the variance of mfcc k for channel i
325 d += v[i][k] / v[j][k] + v[j][k] / v[i][k];
326 d += (m[i][k] - m[j][k])
327 * (1.0 / v[i][k] + 1.0 / v[j][k])
328 * (m[i][k] - m[j][k]);
329 }
330 d /= 2.0;
331 distances[i].push_back(d);
332 }
333 }
334
335 FeatureSet returnFeatures;
336
337 for (int i = 0; i < m_channels; ++i) {
338
339 Feature feature;
340 feature.hasTimestamp = true; // otherwise hosts will tend to stamp them at the end of the file, which is annoying
341 feature.timestamp = Vamp::RealTime(i, 0);
342
343 feature.values.clear();
344 for (int k = 0; k < m_K; ++k) {
345 feature.values.push_back(m[i][k]);
346 }
347
348 returnFeatures[1].push_back(feature);
349
350 feature.values.clear();
351 for (int k = 0; k < m_K; ++k) {
352 feature.values.push_back(v[i][k]);
353 }
354
355 returnFeatures[2].push_back(feature);
356
357 feature.values.clear();
358 for (int j = 0; j < m_channels; ++j) {
359 feature.values.push_back(distances[i][j]);
360 }
361 ostringstream oss;
362 oss << "Distance from " << (i + 1);
363 feature.label = oss.str();
364
365 returnFeatures[0].push_back(feature);
366 }
367
368 return returnFeatures;
369 }