c@45
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
c@45
|
2
|
c@45
|
3 /*
|
c@45
|
4 QM Vamp Plugin Set
|
c@45
|
5
|
c@45
|
6 Centre for Digital Music, Queen Mary, University of London.
|
c@45
|
7 All rights reserved.
|
c@45
|
8 */
|
c@45
|
9
|
c@45
|
10 #include "MFCCPlugin.h"
|
c@45
|
11
|
c@45
|
12 #include <dsp/mfcc/MFCC.h>
|
c@130
|
13 #include <maths/MathUtilities.h>
|
c@54
|
14
|
c@45
|
15 using std::string;
|
c@45
|
16 using std::vector;
|
c@45
|
17 using std::cerr;
|
c@45
|
18 using std::endl;
|
c@45
|
19
|
c@45
|
20 MFCCPlugin::MFCCPlugin(float inputSampleRate) :
|
c@45
|
21 Vamp::Plugin(inputSampleRate),
|
c@45
|
22 m_config(lrintf(inputSampleRate)),
|
c@45
|
23 m_mfcc(0),
|
c@45
|
24 m_step(1024),
|
c@60
|
25 m_block(2048),
|
c@60
|
26 m_count(0)
|
c@45
|
27 {
|
c@45
|
28 m_bins = 20;
|
c@45
|
29 m_wantC0 = true;
|
c@45
|
30 m_logpower = 1;
|
c@45
|
31
|
c@45
|
32 setupConfig();
|
c@45
|
33 }
|
c@45
|
34
|
c@45
|
35 void
|
c@45
|
36 MFCCPlugin::setupConfig()
|
c@45
|
37 {
|
c@45
|
38 m_config.FS = lrintf(m_inputSampleRate);
|
c@45
|
39 m_config.fftsize = m_block;
|
c@45
|
40 m_config.nceps = (m_wantC0 ? m_bins-1 : m_bins);
|
c@45
|
41 m_config.want_c0 = m_wantC0;
|
c@45
|
42 m_config.logpower = m_logpower;
|
c@45
|
43 }
|
c@45
|
44
|
c@45
|
45 MFCCPlugin::~MFCCPlugin()
|
c@45
|
46 {
|
c@45
|
47 delete m_mfcc;
|
c@45
|
48 }
|
c@45
|
49
|
c@45
|
50 string
|
c@45
|
51 MFCCPlugin::getIdentifier() const
|
c@45
|
52 {
|
c@45
|
53 return "qm-mfcc";
|
c@45
|
54 }
|
c@45
|
55
|
c@45
|
56 string
|
c@45
|
57 MFCCPlugin::getName() const
|
c@45
|
58 {
|
c@45
|
59 return "Mel-Frequency Cepstral Coefficients";
|
c@45
|
60 }
|
c@45
|
61
|
c@45
|
62 string
|
c@45
|
63 MFCCPlugin::getDescription() const
|
c@45
|
64 {
|
c@50
|
65 return "Calculate a series of MFCC vectors from the audio";
|
c@45
|
66 }
|
c@45
|
67
|
c@45
|
68 string
|
c@45
|
69 MFCCPlugin::getMaker() const
|
c@45
|
70 {
|
c@45
|
71 return "Queen Mary, University of London";
|
c@45
|
72 }
|
c@45
|
73
|
c@45
|
74 int
|
c@45
|
75 MFCCPlugin::getPluginVersion() const
|
c@45
|
76 {
|
c@45
|
77 return 1;
|
c@45
|
78 }
|
c@45
|
79
|
c@45
|
80 string
|
c@45
|
81 MFCCPlugin::getCopyright() const
|
c@45
|
82 {
|
c@118
|
83 return "Plugin by Nicolas Chetry and Chris Cannam. Copyright (c) 2009 QMUL - All Rights Reserved";
|
c@45
|
84 }
|
c@45
|
85
|
c@45
|
86 MFCCPlugin::ParameterList
|
c@45
|
87 MFCCPlugin::getParameterDescriptors() const
|
c@45
|
88 {
|
c@45
|
89 ParameterList list;
|
c@45
|
90
|
c@45
|
91 ParameterDescriptor desc;
|
c@45
|
92 desc.identifier = "nceps";
|
c@45
|
93 desc.name = "Number of Coefficients";
|
c@45
|
94 desc.unit = "";
|
c@52
|
95 desc.description = "Number of MFCCs to return, starting from C0 if \"Include C0\" is specified or from C1 otherwise";
|
c@45
|
96 desc.minValue = 1;
|
c@45
|
97 desc.maxValue = 40;
|
c@45
|
98 desc.defaultValue = 20;
|
c@45
|
99 desc.isQuantized = true;
|
c@45
|
100 desc.quantizeStep = 1;
|
c@45
|
101 list.push_back(desc);
|
c@45
|
102
|
c@45
|
103 desc.identifier = "logpower";
|
c@45
|
104 desc.name = "Power for Mel Amplitude Logs";
|
c@45
|
105 desc.unit = "";
|
c@52
|
106 desc.description = "Power to raise the amplitude log values to before applying DCT. Values greater than 1 may reduce contribution of noise";
|
c@45
|
107 desc.minValue = 0;
|
c@45
|
108 desc.maxValue = 5;
|
c@45
|
109 desc.defaultValue = 1;
|
c@45
|
110 desc.isQuantized = false;
|
c@45
|
111 desc.quantizeStep = 0;
|
c@45
|
112 list.push_back(desc);
|
c@45
|
113
|
c@45
|
114 desc.identifier = "wantc0";
|
c@45
|
115 desc.name = "Include C0";
|
c@45
|
116 desc.unit = "";
|
c@52
|
117 desc.description = "Whether to include the C0 (energy level) coefficient in the returned results";
|
c@45
|
118 desc.minValue = 0;
|
c@45
|
119 desc.maxValue = 1;
|
c@45
|
120 desc.defaultValue = 1;
|
c@45
|
121 desc.isQuantized = true;
|
c@45
|
122 desc.quantizeStep = 1;
|
c@45
|
123 list.push_back(desc);
|
c@45
|
124
|
c@45
|
125 return list;
|
c@45
|
126 }
|
c@45
|
127
|
c@45
|
128 float
|
c@45
|
129 MFCCPlugin::getParameter(std::string param) const
|
c@45
|
130 {
|
c@45
|
131 if (param == "nceps") {
|
c@45
|
132 return m_bins;
|
c@45
|
133 }
|
c@45
|
134 if (param == "logpower") {
|
c@45
|
135 return m_logpower;
|
c@45
|
136 }
|
c@45
|
137 if (param == "wantc0") {
|
c@45
|
138 return m_wantC0 ? 1 : 0;
|
c@45
|
139 }
|
c@45
|
140 std::cerr << "WARNING: MFCCPlugin::getParameter: unknown parameter \""
|
c@45
|
141 << param << "\"" << std::endl;
|
c@45
|
142 return 0.0;
|
c@45
|
143 }
|
c@45
|
144
|
c@45
|
145 void
|
c@45
|
146 MFCCPlugin::setParameter(std::string param, float value)
|
c@45
|
147 {
|
c@45
|
148 if (param == "nceps") {
|
c@45
|
149 m_bins = lrintf(value);
|
c@45
|
150 } else if (param == "logpower") {
|
c@45
|
151 m_logpower = lrintf(value);
|
c@45
|
152 } else if (param == "wantc0") {
|
c@45
|
153 m_wantC0 = (value > 0.5);
|
c@45
|
154 } else {
|
c@45
|
155 std::cerr << "WARNING: MFCCPlugin::setParameter: unknown parameter \""
|
c@45
|
156 << param << "\"" << std::endl;
|
c@45
|
157 }
|
c@45
|
158
|
c@45
|
159 setupConfig();
|
c@45
|
160 }
|
c@45
|
161
|
c@45
|
162 bool
|
c@45
|
163 MFCCPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
c@45
|
164 {
|
c@45
|
165 if (m_mfcc) {
|
c@45
|
166 delete m_mfcc;
|
c@45
|
167 m_mfcc = 0;
|
c@45
|
168 }
|
c@45
|
169
|
c@45
|
170 if (channels < getMinChannelCount() ||
|
c@45
|
171 channels > getMaxChannelCount()) return false;
|
c@45
|
172
|
c@95
|
173 // std::cerr << "MFCCPlugin::initialise: step " << stepSize << ", block "
|
c@95
|
174 // << blockSize << std::endl;
|
c@45
|
175
|
c@45
|
176 m_step = stepSize;
|
c@45
|
177 m_block = blockSize;
|
c@45
|
178 setupConfig();
|
c@45
|
179
|
c@45
|
180 m_mfcc = new MFCC(m_config);
|
c@45
|
181
|
c@45
|
182 m_binsums = vector<double>(m_bins);
|
c@45
|
183 for (int i = 0; i < m_bins; ++i) {
|
c@45
|
184 m_binsums[i] = 0.0;
|
c@45
|
185 }
|
c@45
|
186
|
c@45
|
187 return true;
|
c@45
|
188 }
|
c@45
|
189
|
c@45
|
190 void
|
c@45
|
191 MFCCPlugin::reset()
|
c@45
|
192 {
|
c@45
|
193 if (m_mfcc) {
|
c@45
|
194 delete m_mfcc;
|
c@45
|
195 m_mfcc = new MFCC(m_config);
|
c@45
|
196 for (int i = 0; i < m_bins; ++i) {
|
c@45
|
197 m_binsums[i] = 0.0;
|
c@45
|
198 }
|
c@45
|
199 }
|
c@60
|
200 m_count = 0;
|
c@45
|
201 }
|
c@45
|
202
|
c@45
|
203 size_t
|
c@45
|
204 MFCCPlugin::getPreferredStepSize() const
|
c@45
|
205 {
|
c@45
|
206 return 1024;
|
c@45
|
207 }
|
c@45
|
208
|
c@45
|
209 size_t
|
c@45
|
210 MFCCPlugin::getPreferredBlockSize() const
|
c@45
|
211 {
|
c@45
|
212 return 2048;
|
c@45
|
213 }
|
c@45
|
214
|
c@45
|
215 MFCCPlugin::OutputList
|
c@45
|
216 MFCCPlugin::getOutputDescriptors() const
|
c@45
|
217 {
|
c@45
|
218 OutputList list;
|
c@45
|
219
|
c@45
|
220 OutputDescriptor d;
|
c@45
|
221 d.identifier = "coefficients";
|
c@45
|
222 d.name = "Coefficients";
|
c@45
|
223 d.unit = "";
|
c@52
|
224 d.description = "MFCC values";
|
c@45
|
225 d.hasFixedBinCount = true;
|
c@45
|
226 d.binCount = m_bins;
|
c@45
|
227 d.hasKnownExtents = false;
|
c@45
|
228 d.isQuantized = false;
|
c@45
|
229 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
c@45
|
230 list.push_back(d);
|
c@45
|
231
|
c@45
|
232 d.identifier = "means";
|
c@45
|
233 d.name = "Means of Coefficients";
|
c@52
|
234 d.description = "Mean values of MFCCs across duration of audio input";
|
c@45
|
235 d.sampleType = OutputDescriptor::FixedSampleRate;
|
c@45
|
236 d.sampleRate = 1;
|
c@45
|
237 list.push_back(d);
|
c@45
|
238
|
c@45
|
239 return list;
|
c@45
|
240 }
|
c@45
|
241
|
c@45
|
242 MFCCPlugin::FeatureSet
|
c@45
|
243 MFCCPlugin::process(const float *const *inputBuffers,
|
c@45
|
244 Vamp::RealTime /* timestamp */)
|
c@45
|
245 {
|
c@45
|
246 if (!m_mfcc) {
|
c@45
|
247 cerr << "ERROR: MFCCPlugin::process: "
|
c@45
|
248 << "MFCC has not been initialised"
|
c@45
|
249 << endl;
|
c@45
|
250 return FeatureSet();
|
c@45
|
251 }
|
c@45
|
252
|
c@45
|
253 double *real = new double[m_block];
|
c@45
|
254 double *imag = new double[m_block];
|
c@45
|
255
|
c@75
|
256 for (size_t i = 0; i <= m_block/2; ++i) {
|
c@45
|
257 real[i] = inputBuffers[0][i*2];
|
c@45
|
258 if (i > 0) real[m_block - i] = real[i];
|
c@45
|
259 imag[i] = inputBuffers[0][i*2+1];
|
c@45
|
260 if (i > 0) imag[m_block - i] = imag[i];
|
c@45
|
261 }
|
c@45
|
262
|
c@45
|
263 double *output = new double[m_bins];
|
c@45
|
264
|
c@45
|
265 m_mfcc->process(real, imag, output);
|
c@45
|
266
|
c@45
|
267 delete[] real;
|
c@45
|
268 delete[] imag;
|
c@45
|
269
|
c@45
|
270 Feature feature;
|
c@45
|
271 feature.hasTimestamp = false;
|
c@45
|
272 for (size_t i = 0; i < m_bins; ++i) {
|
c@45
|
273 double value = output[i];
|
c@130
|
274 if (ISNAN(value)) value = 0.0;
|
c@45
|
275 m_binsums[i] += value;
|
c@45
|
276 feature.values.push_back(value);
|
c@45
|
277 }
|
c@45
|
278 feature.label = "";
|
c@45
|
279 ++m_count;
|
c@45
|
280
|
c@95
|
281 delete[] output;
|
c@95
|
282
|
c@45
|
283 FeatureSet returnFeatures;
|
c@45
|
284 returnFeatures[0].push_back(feature);
|
c@45
|
285 return returnFeatures;
|
c@45
|
286 }
|
c@45
|
287
|
c@45
|
288 MFCCPlugin::FeatureSet
|
c@45
|
289 MFCCPlugin::getRemainingFeatures()
|
c@45
|
290 {
|
c@45
|
291 Feature feature;
|
c@45
|
292 feature.hasTimestamp = true;
|
c@45
|
293 feature.timestamp = Vamp::RealTime::zeroTime;
|
c@45
|
294
|
c@45
|
295 for (size_t i = 0; i < m_bins; ++i) {
|
c@45
|
296 double v = m_binsums[i];
|
c@45
|
297 if (m_count > 0) v /= m_count;
|
c@45
|
298 feature.values.push_back(v);
|
c@45
|
299 }
|
c@45
|
300 feature.label = "Coefficient means";
|
c@45
|
301
|
c@45
|
302 FeatureSet returnFeatures;
|
c@45
|
303 returnFeatures[1].push_back(feature);
|
c@45
|
304 return returnFeatures;
|
c@45
|
305 }
|
c@45
|
306
|