c@45
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
c@45
|
2
|
c@45
|
3 /*
|
c@45
|
4 QM Vamp Plugin Set
|
c@45
|
5
|
c@45
|
6 Centre for Digital Music, Queen Mary, University of London.
|
c@135
|
7
|
c@135
|
8 This program is free software; you can redistribute it and/or
|
c@135
|
9 modify it under the terms of the GNU General Public License as
|
c@135
|
10 published by the Free Software Foundation; either version 2 of the
|
c@135
|
11 License, or (at your option) any later version. See the file
|
c@135
|
12 COPYING included with this distribution for more information.
|
c@45
|
13 */
|
c@45
|
14
|
c@45
|
15 #include "MFCCPlugin.h"
|
c@45
|
16
|
c@45
|
17 #include <dsp/mfcc/MFCC.h>
|
c@130
|
18 #include <maths/MathUtilities.h>
|
c@54
|
19
|
c@45
|
20 using std::string;
|
c@45
|
21 using std::vector;
|
c@45
|
22 using std::cerr;
|
c@45
|
23 using std::endl;
|
c@45
|
24
|
c@45
|
25 MFCCPlugin::MFCCPlugin(float inputSampleRate) :
|
c@45
|
26 Vamp::Plugin(inputSampleRate),
|
c@45
|
27 m_config(lrintf(inputSampleRate)),
|
c@45
|
28 m_mfcc(0),
|
c@45
|
29 m_step(1024),
|
c@60
|
30 m_block(2048),
|
c@60
|
31 m_count(0)
|
c@45
|
32 {
|
c@45
|
33 m_bins = 20;
|
c@45
|
34 m_wantC0 = true;
|
c@45
|
35 m_logpower = 1;
|
c@45
|
36
|
c@45
|
37 setupConfig();
|
c@45
|
38 }
|
c@45
|
39
|
c@45
|
40 void
|
c@45
|
41 MFCCPlugin::setupConfig()
|
c@45
|
42 {
|
c@45
|
43 m_config.FS = lrintf(m_inputSampleRate);
|
c@45
|
44 m_config.fftsize = m_block;
|
c@45
|
45 m_config.nceps = (m_wantC0 ? m_bins-1 : m_bins);
|
c@45
|
46 m_config.want_c0 = m_wantC0;
|
c@45
|
47 m_config.logpower = m_logpower;
|
c@45
|
48 }
|
c@45
|
49
|
c@45
|
50 MFCCPlugin::~MFCCPlugin()
|
c@45
|
51 {
|
c@45
|
52 delete m_mfcc;
|
c@45
|
53 }
|
c@45
|
54
|
c@45
|
55 string
|
c@45
|
56 MFCCPlugin::getIdentifier() const
|
c@45
|
57 {
|
c@45
|
58 return "qm-mfcc";
|
c@45
|
59 }
|
c@45
|
60
|
c@45
|
61 string
|
c@45
|
62 MFCCPlugin::getName() const
|
c@45
|
63 {
|
c@45
|
64 return "Mel-Frequency Cepstral Coefficients";
|
c@45
|
65 }
|
c@45
|
66
|
c@45
|
67 string
|
c@45
|
68 MFCCPlugin::getDescription() const
|
c@45
|
69 {
|
c@50
|
70 return "Calculate a series of MFCC vectors from the audio";
|
c@45
|
71 }
|
c@45
|
72
|
c@45
|
73 string
|
c@45
|
74 MFCCPlugin::getMaker() const
|
c@45
|
75 {
|
c@45
|
76 return "Queen Mary, University of London";
|
c@45
|
77 }
|
c@45
|
78
|
c@45
|
79 int
|
c@45
|
80 MFCCPlugin::getPluginVersion() const
|
c@45
|
81 {
|
c@45
|
82 return 1;
|
c@45
|
83 }
|
c@45
|
84
|
c@45
|
85 string
|
c@45
|
86 MFCCPlugin::getCopyright() const
|
c@45
|
87 {
|
c@118
|
88 return "Plugin by Nicolas Chetry and Chris Cannam. Copyright (c) 2009 QMUL - All Rights Reserved";
|
c@45
|
89 }
|
c@45
|
90
|
c@45
|
91 MFCCPlugin::ParameterList
|
c@45
|
92 MFCCPlugin::getParameterDescriptors() const
|
c@45
|
93 {
|
c@45
|
94 ParameterList list;
|
c@45
|
95
|
c@45
|
96 ParameterDescriptor desc;
|
c@45
|
97 desc.identifier = "nceps";
|
c@45
|
98 desc.name = "Number of Coefficients";
|
c@45
|
99 desc.unit = "";
|
c@52
|
100 desc.description = "Number of MFCCs to return, starting from C0 if \"Include C0\" is specified or from C1 otherwise";
|
c@45
|
101 desc.minValue = 1;
|
c@45
|
102 desc.maxValue = 40;
|
c@45
|
103 desc.defaultValue = 20;
|
c@45
|
104 desc.isQuantized = true;
|
c@45
|
105 desc.quantizeStep = 1;
|
c@45
|
106 list.push_back(desc);
|
c@45
|
107
|
c@45
|
108 desc.identifier = "logpower";
|
c@45
|
109 desc.name = "Power for Mel Amplitude Logs";
|
c@45
|
110 desc.unit = "";
|
c@52
|
111 desc.description = "Power to raise the amplitude log values to before applying DCT. Values greater than 1 may reduce contribution of noise";
|
c@45
|
112 desc.minValue = 0;
|
c@45
|
113 desc.maxValue = 5;
|
c@45
|
114 desc.defaultValue = 1;
|
c@45
|
115 desc.isQuantized = false;
|
c@45
|
116 desc.quantizeStep = 0;
|
c@45
|
117 list.push_back(desc);
|
c@45
|
118
|
c@45
|
119 desc.identifier = "wantc0";
|
c@45
|
120 desc.name = "Include C0";
|
c@45
|
121 desc.unit = "";
|
c@52
|
122 desc.description = "Whether to include the C0 (energy level) coefficient in the returned results";
|
c@45
|
123 desc.minValue = 0;
|
c@45
|
124 desc.maxValue = 1;
|
c@45
|
125 desc.defaultValue = 1;
|
c@45
|
126 desc.isQuantized = true;
|
c@45
|
127 desc.quantizeStep = 1;
|
c@45
|
128 list.push_back(desc);
|
c@45
|
129
|
c@45
|
130 return list;
|
c@45
|
131 }
|
c@45
|
132
|
c@45
|
133 float
|
c@45
|
134 MFCCPlugin::getParameter(std::string param) const
|
c@45
|
135 {
|
c@45
|
136 if (param == "nceps") {
|
c@45
|
137 return m_bins;
|
c@45
|
138 }
|
c@45
|
139 if (param == "logpower") {
|
c@45
|
140 return m_logpower;
|
c@45
|
141 }
|
c@45
|
142 if (param == "wantc0") {
|
c@45
|
143 return m_wantC0 ? 1 : 0;
|
c@45
|
144 }
|
c@45
|
145 std::cerr << "WARNING: MFCCPlugin::getParameter: unknown parameter \""
|
c@45
|
146 << param << "\"" << std::endl;
|
c@45
|
147 return 0.0;
|
c@45
|
148 }
|
c@45
|
149
|
c@45
|
150 void
|
c@45
|
151 MFCCPlugin::setParameter(std::string param, float value)
|
c@45
|
152 {
|
c@45
|
153 if (param == "nceps") {
|
c@45
|
154 m_bins = lrintf(value);
|
c@45
|
155 } else if (param == "logpower") {
|
c@45
|
156 m_logpower = lrintf(value);
|
c@45
|
157 } else if (param == "wantc0") {
|
c@45
|
158 m_wantC0 = (value > 0.5);
|
c@45
|
159 } else {
|
c@45
|
160 std::cerr << "WARNING: MFCCPlugin::setParameter: unknown parameter \""
|
c@45
|
161 << param << "\"" << std::endl;
|
c@45
|
162 }
|
c@45
|
163
|
c@45
|
164 setupConfig();
|
c@45
|
165 }
|
c@45
|
166
|
c@45
|
167 bool
|
c@45
|
168 MFCCPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
c@45
|
169 {
|
c@45
|
170 if (m_mfcc) {
|
c@45
|
171 delete m_mfcc;
|
c@45
|
172 m_mfcc = 0;
|
c@45
|
173 }
|
c@45
|
174
|
c@45
|
175 if (channels < getMinChannelCount() ||
|
c@45
|
176 channels > getMaxChannelCount()) return false;
|
c@45
|
177
|
c@95
|
178 // std::cerr << "MFCCPlugin::initialise: step " << stepSize << ", block "
|
c@95
|
179 // << blockSize << std::endl;
|
c@45
|
180
|
c@45
|
181 m_step = stepSize;
|
c@45
|
182 m_block = blockSize;
|
c@45
|
183 setupConfig();
|
c@45
|
184
|
c@45
|
185 m_mfcc = new MFCC(m_config);
|
c@45
|
186
|
c@45
|
187 m_binsums = vector<double>(m_bins);
|
c@45
|
188 for (int i = 0; i < m_bins; ++i) {
|
c@45
|
189 m_binsums[i] = 0.0;
|
c@45
|
190 }
|
c@45
|
191
|
c@45
|
192 return true;
|
c@45
|
193 }
|
c@45
|
194
|
c@45
|
195 void
|
c@45
|
196 MFCCPlugin::reset()
|
c@45
|
197 {
|
c@45
|
198 if (m_mfcc) {
|
c@45
|
199 delete m_mfcc;
|
c@45
|
200 m_mfcc = new MFCC(m_config);
|
c@45
|
201 for (int i = 0; i < m_bins; ++i) {
|
c@45
|
202 m_binsums[i] = 0.0;
|
c@45
|
203 }
|
c@45
|
204 }
|
c@60
|
205 m_count = 0;
|
c@45
|
206 }
|
c@45
|
207
|
c@45
|
208 size_t
|
c@45
|
209 MFCCPlugin::getPreferredStepSize() const
|
c@45
|
210 {
|
c@45
|
211 return 1024;
|
c@45
|
212 }
|
c@45
|
213
|
c@45
|
214 size_t
|
c@45
|
215 MFCCPlugin::getPreferredBlockSize() const
|
c@45
|
216 {
|
c@45
|
217 return 2048;
|
c@45
|
218 }
|
c@45
|
219
|
c@45
|
220 MFCCPlugin::OutputList
|
c@45
|
221 MFCCPlugin::getOutputDescriptors() const
|
c@45
|
222 {
|
c@45
|
223 OutputList list;
|
c@45
|
224
|
c@45
|
225 OutputDescriptor d;
|
c@45
|
226 d.identifier = "coefficients";
|
c@45
|
227 d.name = "Coefficients";
|
c@45
|
228 d.unit = "";
|
c@52
|
229 d.description = "MFCC values";
|
c@45
|
230 d.hasFixedBinCount = true;
|
c@45
|
231 d.binCount = m_bins;
|
c@45
|
232 d.hasKnownExtents = false;
|
c@45
|
233 d.isQuantized = false;
|
c@45
|
234 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
c@45
|
235 list.push_back(d);
|
c@45
|
236
|
c@45
|
237 d.identifier = "means";
|
c@45
|
238 d.name = "Means of Coefficients";
|
c@52
|
239 d.description = "Mean values of MFCCs across duration of audio input";
|
c@45
|
240 d.sampleType = OutputDescriptor::FixedSampleRate;
|
c@45
|
241 d.sampleRate = 1;
|
c@45
|
242 list.push_back(d);
|
c@45
|
243
|
c@45
|
244 return list;
|
c@45
|
245 }
|
c@45
|
246
|
c@45
|
247 MFCCPlugin::FeatureSet
|
c@45
|
248 MFCCPlugin::process(const float *const *inputBuffers,
|
c@45
|
249 Vamp::RealTime /* timestamp */)
|
c@45
|
250 {
|
c@45
|
251 if (!m_mfcc) {
|
c@45
|
252 cerr << "ERROR: MFCCPlugin::process: "
|
c@45
|
253 << "MFCC has not been initialised"
|
c@45
|
254 << endl;
|
c@45
|
255 return FeatureSet();
|
c@45
|
256 }
|
c@45
|
257
|
c@45
|
258 double *real = new double[m_block];
|
c@45
|
259 double *imag = new double[m_block];
|
c@45
|
260
|
c@75
|
261 for (size_t i = 0; i <= m_block/2; ++i) {
|
c@45
|
262 real[i] = inputBuffers[0][i*2];
|
c@45
|
263 if (i > 0) real[m_block - i] = real[i];
|
c@45
|
264 imag[i] = inputBuffers[0][i*2+1];
|
c@45
|
265 if (i > 0) imag[m_block - i] = imag[i];
|
c@45
|
266 }
|
c@45
|
267
|
c@45
|
268 double *output = new double[m_bins];
|
c@45
|
269
|
c@45
|
270 m_mfcc->process(real, imag, output);
|
c@45
|
271
|
c@45
|
272 delete[] real;
|
c@45
|
273 delete[] imag;
|
c@45
|
274
|
c@45
|
275 Feature feature;
|
c@45
|
276 feature.hasTimestamp = false;
|
c@178
|
277 for (int i = 0; i < m_bins; ++i) {
|
c@45
|
278 double value = output[i];
|
c@130
|
279 if (ISNAN(value)) value = 0.0;
|
c@45
|
280 m_binsums[i] += value;
|
c@45
|
281 feature.values.push_back(value);
|
c@45
|
282 }
|
c@45
|
283 feature.label = "";
|
c@45
|
284 ++m_count;
|
c@45
|
285
|
c@95
|
286 delete[] output;
|
c@95
|
287
|
c@45
|
288 FeatureSet returnFeatures;
|
c@45
|
289 returnFeatures[0].push_back(feature);
|
c@45
|
290 return returnFeatures;
|
c@45
|
291 }
|
c@45
|
292
|
c@45
|
293 MFCCPlugin::FeatureSet
|
c@45
|
294 MFCCPlugin::getRemainingFeatures()
|
c@45
|
295 {
|
c@45
|
296 Feature feature;
|
c@45
|
297 feature.hasTimestamp = true;
|
c@45
|
298 feature.timestamp = Vamp::RealTime::zeroTime;
|
c@45
|
299
|
c@178
|
300 for (int i = 0; i < m_bins; ++i) {
|
c@45
|
301 double v = m_binsums[i];
|
c@45
|
302 if (m_count > 0) v /= m_count;
|
c@45
|
303 feature.values.push_back(v);
|
c@45
|
304 }
|
c@45
|
305 feature.label = "Coefficient means";
|
c@45
|
306
|
c@45
|
307 FeatureSet returnFeatures;
|
c@45
|
308 returnFeatures[1].push_back(feature);
|
c@45
|
309 return returnFeatures;
|
c@45
|
310 }
|
c@45
|
311
|