c@45
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
c@45
|
2
|
c@45
|
3 /*
|
c@45
|
4 QM Vamp Plugin Set
|
c@45
|
5
|
c@45
|
6 Centre for Digital Music, Queen Mary, University of London.
|
c@45
|
7 All rights reserved.
|
c@45
|
8 */
|
c@45
|
9
|
c@45
|
10 #include "MFCCPlugin.h"
|
c@45
|
11
|
c@45
|
12 #include <dsp/mfcc/MFCC.h>
|
c@45
|
13
|
c@54
|
14 #include <cmath>
|
c@54
|
15
|
c@45
|
16 using std::string;
|
c@45
|
17 using std::vector;
|
c@45
|
18 using std::cerr;
|
c@45
|
19 using std::endl;
|
c@45
|
20
|
c@54
|
21 using std::isnan;
|
c@54
|
22
|
c@45
|
23 MFCCPlugin::MFCCPlugin(float inputSampleRate) :
|
c@45
|
24 Vamp::Plugin(inputSampleRate),
|
c@45
|
25 m_config(lrintf(inputSampleRate)),
|
c@45
|
26 m_mfcc(0),
|
c@45
|
27 m_step(1024),
|
c@60
|
28 m_block(2048),
|
c@60
|
29 m_count(0)
|
c@45
|
30 {
|
c@45
|
31 m_bins = 20;
|
c@45
|
32 m_wantC0 = true;
|
c@45
|
33 m_logpower = 1;
|
c@45
|
34
|
c@45
|
35 setupConfig();
|
c@45
|
36 }
|
c@45
|
37
|
c@45
|
38 void
|
c@45
|
39 MFCCPlugin::setupConfig()
|
c@45
|
40 {
|
c@45
|
41 m_config.FS = lrintf(m_inputSampleRate);
|
c@45
|
42 m_config.fftsize = m_block;
|
c@45
|
43 m_config.nceps = (m_wantC0 ? m_bins-1 : m_bins);
|
c@45
|
44 m_config.want_c0 = m_wantC0;
|
c@45
|
45 m_config.logpower = m_logpower;
|
c@45
|
46 }
|
c@45
|
47
|
c@45
|
48 MFCCPlugin::~MFCCPlugin()
|
c@45
|
49 {
|
c@45
|
50 delete m_mfcc;
|
c@45
|
51 }
|
c@45
|
52
|
c@45
|
53 string
|
c@45
|
54 MFCCPlugin::getIdentifier() const
|
c@45
|
55 {
|
c@45
|
56 return "qm-mfcc";
|
c@45
|
57 }
|
c@45
|
58
|
c@45
|
59 string
|
c@45
|
60 MFCCPlugin::getName() const
|
c@45
|
61 {
|
c@45
|
62 return "Mel-Frequency Cepstral Coefficients";
|
c@45
|
63 }
|
c@45
|
64
|
c@45
|
65 string
|
c@45
|
66 MFCCPlugin::getDescription() const
|
c@45
|
67 {
|
c@50
|
68 return "Calculate a series of MFCC vectors from the audio";
|
c@45
|
69 }
|
c@45
|
70
|
c@45
|
71 string
|
c@45
|
72 MFCCPlugin::getMaker() const
|
c@45
|
73 {
|
c@45
|
74 return "Queen Mary, University of London";
|
c@45
|
75 }
|
c@45
|
76
|
c@45
|
77 int
|
c@45
|
78 MFCCPlugin::getPluginVersion() const
|
c@45
|
79 {
|
c@45
|
80 return 1;
|
c@45
|
81 }
|
c@45
|
82
|
c@45
|
83 string
|
c@45
|
84 MFCCPlugin::getCopyright() const
|
c@45
|
85 {
|
c@50
|
86 return "Plugin by Nicolas Chetry and Chris Cannam. Copyright (c) 2008 QMUL - All Rights Reserved";
|
c@45
|
87 }
|
c@45
|
88
|
c@45
|
89 MFCCPlugin::ParameterList
|
c@45
|
90 MFCCPlugin::getParameterDescriptors() const
|
c@45
|
91 {
|
c@45
|
92 ParameterList list;
|
c@45
|
93
|
c@45
|
94 ParameterDescriptor desc;
|
c@45
|
95 desc.identifier = "nceps";
|
c@45
|
96 desc.name = "Number of Coefficients";
|
c@45
|
97 desc.unit = "";
|
c@52
|
98 desc.description = "Number of MFCCs to return, starting from C0 if \"Include C0\" is specified or from C1 otherwise";
|
c@45
|
99 desc.minValue = 1;
|
c@45
|
100 desc.maxValue = 40;
|
c@45
|
101 desc.defaultValue = 20;
|
c@45
|
102 desc.isQuantized = true;
|
c@45
|
103 desc.quantizeStep = 1;
|
c@45
|
104 list.push_back(desc);
|
c@45
|
105
|
c@45
|
106 desc.identifier = "logpower";
|
c@45
|
107 desc.name = "Power for Mel Amplitude Logs";
|
c@45
|
108 desc.unit = "";
|
c@52
|
109 desc.description = "Power to raise the amplitude log values to before applying DCT. Values greater than 1 may reduce contribution of noise";
|
c@45
|
110 desc.minValue = 0;
|
c@45
|
111 desc.maxValue = 5;
|
c@45
|
112 desc.defaultValue = 1;
|
c@45
|
113 desc.isQuantized = false;
|
c@45
|
114 desc.quantizeStep = 0;
|
c@45
|
115 list.push_back(desc);
|
c@45
|
116
|
c@45
|
117 desc.identifier = "wantc0";
|
c@45
|
118 desc.name = "Include C0";
|
c@45
|
119 desc.unit = "";
|
c@52
|
120 desc.description = "Whether to include the C0 (energy level) coefficient in the returned results";
|
c@45
|
121 desc.minValue = 0;
|
c@45
|
122 desc.maxValue = 1;
|
c@45
|
123 desc.defaultValue = 1;
|
c@45
|
124 desc.isQuantized = true;
|
c@45
|
125 desc.quantizeStep = 1;
|
c@45
|
126 list.push_back(desc);
|
c@45
|
127
|
c@45
|
128 return list;
|
c@45
|
129 }
|
c@45
|
130
|
c@45
|
131 float
|
c@45
|
132 MFCCPlugin::getParameter(std::string param) const
|
c@45
|
133 {
|
c@45
|
134 if (param == "nceps") {
|
c@45
|
135 return m_bins;
|
c@45
|
136 }
|
c@45
|
137 if (param == "logpower") {
|
c@45
|
138 return m_logpower;
|
c@45
|
139 }
|
c@45
|
140 if (param == "wantc0") {
|
c@45
|
141 return m_wantC0 ? 1 : 0;
|
c@45
|
142 }
|
c@45
|
143 std::cerr << "WARNING: MFCCPlugin::getParameter: unknown parameter \""
|
c@45
|
144 << param << "\"" << std::endl;
|
c@45
|
145 return 0.0;
|
c@45
|
146 }
|
c@45
|
147
|
c@45
|
148 void
|
c@45
|
149 MFCCPlugin::setParameter(std::string param, float value)
|
c@45
|
150 {
|
c@45
|
151 if (param == "nceps") {
|
c@45
|
152 m_bins = lrintf(value);
|
c@45
|
153 } else if (param == "logpower") {
|
c@45
|
154 m_logpower = lrintf(value);
|
c@45
|
155 } else if (param == "wantc0") {
|
c@45
|
156 m_wantC0 = (value > 0.5);
|
c@45
|
157 } else {
|
c@45
|
158 std::cerr << "WARNING: MFCCPlugin::setParameter: unknown parameter \""
|
c@45
|
159 << param << "\"" << std::endl;
|
c@45
|
160 }
|
c@45
|
161
|
c@45
|
162 setupConfig();
|
c@45
|
163 }
|
c@45
|
164
|
c@45
|
165 bool
|
c@45
|
166 MFCCPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
c@45
|
167 {
|
c@45
|
168 if (m_mfcc) {
|
c@45
|
169 delete m_mfcc;
|
c@45
|
170 m_mfcc = 0;
|
c@45
|
171 }
|
c@45
|
172
|
c@45
|
173 if (channels < getMinChannelCount() ||
|
c@45
|
174 channels > getMaxChannelCount()) return false;
|
c@45
|
175
|
c@45
|
176 std::cerr << "MFCCPlugin::initialise: step " << stepSize << ", block "
|
c@45
|
177 << blockSize << std::endl;
|
c@45
|
178
|
c@45
|
179 m_step = stepSize;
|
c@45
|
180 m_block = blockSize;
|
c@45
|
181 setupConfig();
|
c@45
|
182
|
c@45
|
183 m_mfcc = new MFCC(m_config);
|
c@45
|
184
|
c@45
|
185 m_binsums = vector<double>(m_bins);
|
c@45
|
186 for (int i = 0; i < m_bins; ++i) {
|
c@45
|
187 m_binsums[i] = 0.0;
|
c@45
|
188 }
|
c@45
|
189
|
c@45
|
190 return true;
|
c@45
|
191 }
|
c@45
|
192
|
c@45
|
193 void
|
c@45
|
194 MFCCPlugin::reset()
|
c@45
|
195 {
|
c@45
|
196 if (m_mfcc) {
|
c@45
|
197 delete m_mfcc;
|
c@45
|
198 m_mfcc = new MFCC(m_config);
|
c@45
|
199 for (int i = 0; i < m_bins; ++i) {
|
c@45
|
200 m_binsums[i] = 0.0;
|
c@45
|
201 }
|
c@45
|
202 }
|
c@60
|
203 m_count = 0;
|
c@45
|
204 }
|
c@45
|
205
|
c@45
|
206 size_t
|
c@45
|
207 MFCCPlugin::getPreferredStepSize() const
|
c@45
|
208 {
|
c@45
|
209 return 1024;
|
c@45
|
210 }
|
c@45
|
211
|
c@45
|
212 size_t
|
c@45
|
213 MFCCPlugin::getPreferredBlockSize() const
|
c@45
|
214 {
|
c@45
|
215 return 2048;
|
c@45
|
216 }
|
c@45
|
217
|
c@45
|
218 MFCCPlugin::OutputList
|
c@45
|
219 MFCCPlugin::getOutputDescriptors() const
|
c@45
|
220 {
|
c@45
|
221 OutputList list;
|
c@45
|
222
|
c@45
|
223 OutputDescriptor d;
|
c@45
|
224 d.identifier = "coefficients";
|
c@45
|
225 d.name = "Coefficients";
|
c@45
|
226 d.unit = "";
|
c@52
|
227 d.description = "MFCC values";
|
c@45
|
228 d.hasFixedBinCount = true;
|
c@45
|
229 d.binCount = m_bins;
|
c@45
|
230 d.hasKnownExtents = false;
|
c@45
|
231 d.isQuantized = false;
|
c@45
|
232 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
c@45
|
233 list.push_back(d);
|
c@45
|
234
|
c@45
|
235 d.identifier = "means";
|
c@45
|
236 d.name = "Means of Coefficients";
|
c@52
|
237 d.description = "Mean values of MFCCs across duration of audio input";
|
c@45
|
238 d.sampleType = OutputDescriptor::FixedSampleRate;
|
c@45
|
239 d.sampleRate = 1;
|
c@45
|
240 list.push_back(d);
|
c@45
|
241
|
c@45
|
242 return list;
|
c@45
|
243 }
|
c@45
|
244
|
c@45
|
245 MFCCPlugin::FeatureSet
|
c@45
|
246 MFCCPlugin::process(const float *const *inputBuffers,
|
c@45
|
247 Vamp::RealTime /* timestamp */)
|
c@45
|
248 {
|
c@45
|
249 if (!m_mfcc) {
|
c@45
|
250 cerr << "ERROR: MFCCPlugin::process: "
|
c@45
|
251 << "MFCC has not been initialised"
|
c@45
|
252 << endl;
|
c@45
|
253 return FeatureSet();
|
c@45
|
254 }
|
c@45
|
255
|
c@45
|
256 double *real = new double[m_block];
|
c@45
|
257 double *imag = new double[m_block];
|
c@45
|
258
|
c@75
|
259 for (size_t i = 0; i <= m_block/2; ++i) {
|
c@45
|
260 real[i] = inputBuffers[0][i*2];
|
c@45
|
261 if (i > 0) real[m_block - i] = real[i];
|
c@45
|
262 imag[i] = inputBuffers[0][i*2+1];
|
c@45
|
263 if (i > 0) imag[m_block - i] = imag[i];
|
c@45
|
264 }
|
c@45
|
265
|
c@45
|
266 double *output = new double[m_bins];
|
c@45
|
267
|
c@45
|
268 m_mfcc->process(real, imag, output);
|
c@45
|
269
|
c@45
|
270 delete[] real;
|
c@45
|
271 delete[] imag;
|
c@45
|
272
|
c@45
|
273 Feature feature;
|
c@45
|
274 feature.hasTimestamp = false;
|
c@45
|
275 for (size_t i = 0; i < m_bins; ++i) {
|
c@45
|
276 double value = output[i];
|
c@45
|
277 if (isnan(value)) value = 0.0;
|
c@45
|
278 m_binsums[i] += value;
|
c@45
|
279 feature.values.push_back(value);
|
c@45
|
280 }
|
c@45
|
281 feature.label = "";
|
c@45
|
282 ++m_count;
|
c@45
|
283
|
c@45
|
284 FeatureSet returnFeatures;
|
c@45
|
285 returnFeatures[0].push_back(feature);
|
c@45
|
286 return returnFeatures;
|
c@45
|
287 }
|
c@45
|
288
|
c@45
|
289 MFCCPlugin::FeatureSet
|
c@45
|
290 MFCCPlugin::getRemainingFeatures()
|
c@45
|
291 {
|
c@45
|
292 Feature feature;
|
c@45
|
293 feature.hasTimestamp = true;
|
c@45
|
294 feature.timestamp = Vamp::RealTime::zeroTime;
|
c@45
|
295
|
c@45
|
296 for (size_t i = 0; i < m_bins; ++i) {
|
c@45
|
297 double v = m_binsums[i];
|
c@45
|
298 if (m_count > 0) v /= m_count;
|
c@45
|
299 feature.values.push_back(v);
|
c@45
|
300 }
|
c@45
|
301 feature.label = "Coefficient means";
|
c@45
|
302
|
c@45
|
303 FeatureSet returnFeatures;
|
c@45
|
304 returnFeatures[1].push_back(feature);
|
c@45
|
305 return returnFeatures;
|
c@45
|
306 }
|
c@45
|
307
|