c@45
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
c@45
|
2
|
c@45
|
3 /*
|
c@45
|
4 QM Vamp Plugin Set
|
c@45
|
5
|
c@45
|
6 Centre for Digital Music, Queen Mary, University of London.
|
c@45
|
7 All rights reserved.
|
c@45
|
8 */
|
c@45
|
9
|
c@45
|
10 #include "MFCCPlugin.h"
|
c@45
|
11
|
c@45
|
12 #include <dsp/mfcc/MFCC.h>
|
c@45
|
13
|
c@45
|
14 using std::string;
|
c@45
|
15 using std::vector;
|
c@45
|
16 using std::cerr;
|
c@45
|
17 using std::endl;
|
c@45
|
18
|
c@45
|
19 MFCCPlugin::MFCCPlugin(float inputSampleRate) :
|
c@45
|
20 Vamp::Plugin(inputSampleRate),
|
c@45
|
21 m_config(lrintf(inputSampleRate)),
|
c@45
|
22 m_mfcc(0),
|
c@45
|
23 m_step(1024),
|
c@45
|
24 m_block(2048)
|
c@45
|
25 {
|
c@45
|
26 m_bins = 20;
|
c@45
|
27 m_wantC0 = true;
|
c@45
|
28 m_logpower = 1;
|
c@45
|
29
|
c@45
|
30 setupConfig();
|
c@45
|
31 }
|
c@45
|
32
|
c@45
|
33 void
|
c@45
|
34 MFCCPlugin::setupConfig()
|
c@45
|
35 {
|
c@45
|
36 m_config.FS = lrintf(m_inputSampleRate);
|
c@45
|
37 m_config.fftsize = m_block;
|
c@45
|
38 m_config.nceps = (m_wantC0 ? m_bins-1 : m_bins);
|
c@45
|
39 m_config.want_c0 = m_wantC0;
|
c@45
|
40 m_config.logpower = m_logpower;
|
c@45
|
41 }
|
c@45
|
42
|
c@45
|
43 MFCCPlugin::~MFCCPlugin()
|
c@45
|
44 {
|
c@45
|
45 delete m_mfcc;
|
c@45
|
46 }
|
c@45
|
47
|
c@45
|
48 string
|
c@45
|
49 MFCCPlugin::getIdentifier() const
|
c@45
|
50 {
|
c@45
|
51 return "qm-mfcc";
|
c@45
|
52 }
|
c@45
|
53
|
c@45
|
54 string
|
c@45
|
55 MFCCPlugin::getName() const
|
c@45
|
56 {
|
c@45
|
57 return "Mel-Frequency Cepstral Coefficients";
|
c@45
|
58 }
|
c@45
|
59
|
c@45
|
60 string
|
c@45
|
61 MFCCPlugin::getDescription() const
|
c@45
|
62 {
|
c@50
|
63 return "Calculate a series of MFCC vectors from the audio";
|
c@45
|
64 }
|
c@45
|
65
|
c@45
|
66 string
|
c@45
|
67 MFCCPlugin::getMaker() const
|
c@45
|
68 {
|
c@45
|
69 return "Queen Mary, University of London";
|
c@45
|
70 }
|
c@45
|
71
|
c@45
|
72 int
|
c@45
|
73 MFCCPlugin::getPluginVersion() const
|
c@45
|
74 {
|
c@45
|
75 return 1;
|
c@45
|
76 }
|
c@45
|
77
|
c@45
|
78 string
|
c@45
|
79 MFCCPlugin::getCopyright() const
|
c@45
|
80 {
|
c@50
|
81 return "Plugin by Nicolas Chetry and Chris Cannam. Copyright (c) 2008 QMUL - All Rights Reserved";
|
c@45
|
82 }
|
c@45
|
83
|
c@45
|
84 MFCCPlugin::ParameterList
|
c@45
|
85 MFCCPlugin::getParameterDescriptors() const
|
c@45
|
86 {
|
c@45
|
87 ParameterList list;
|
c@45
|
88
|
c@45
|
89 ParameterDescriptor desc;
|
c@45
|
90 desc.identifier = "nceps";
|
c@45
|
91 desc.name = "Number of Coefficients";
|
c@45
|
92 desc.unit = "";
|
c@52
|
93 desc.description = "Number of MFCCs to return, starting from C0 if \"Include C0\" is specified or from C1 otherwise";
|
c@45
|
94 desc.minValue = 1;
|
c@45
|
95 desc.maxValue = 40;
|
c@45
|
96 desc.defaultValue = 20;
|
c@45
|
97 desc.isQuantized = true;
|
c@45
|
98 desc.quantizeStep = 1;
|
c@45
|
99 list.push_back(desc);
|
c@45
|
100
|
c@45
|
101 desc.identifier = "logpower";
|
c@45
|
102 desc.name = "Power for Mel Amplitude Logs";
|
c@45
|
103 desc.unit = "";
|
c@52
|
104 desc.description = "Power to raise the amplitude log values to before applying DCT. Values greater than 1 may reduce contribution of noise";
|
c@45
|
105 desc.minValue = 0;
|
c@45
|
106 desc.maxValue = 5;
|
c@45
|
107 desc.defaultValue = 1;
|
c@45
|
108 desc.isQuantized = false;
|
c@45
|
109 desc.quantizeStep = 0;
|
c@45
|
110 list.push_back(desc);
|
c@45
|
111
|
c@45
|
112 desc.identifier = "wantc0";
|
c@45
|
113 desc.name = "Include C0";
|
c@45
|
114 desc.unit = "";
|
c@52
|
115 desc.description = "Whether to include the C0 (energy level) coefficient in the returned results";
|
c@45
|
116 desc.minValue = 0;
|
c@45
|
117 desc.maxValue = 1;
|
c@45
|
118 desc.defaultValue = 1;
|
c@45
|
119 desc.isQuantized = true;
|
c@45
|
120 desc.quantizeStep = 1;
|
c@45
|
121 list.push_back(desc);
|
c@45
|
122
|
c@45
|
123 return list;
|
c@45
|
124 }
|
c@45
|
125
|
c@45
|
126 float
|
c@45
|
127 MFCCPlugin::getParameter(std::string param) const
|
c@45
|
128 {
|
c@45
|
129 if (param == "nceps") {
|
c@45
|
130 return m_bins;
|
c@45
|
131 }
|
c@45
|
132 if (param == "logpower") {
|
c@45
|
133 return m_logpower;
|
c@45
|
134 }
|
c@45
|
135 if (param == "wantc0") {
|
c@45
|
136 return m_wantC0 ? 1 : 0;
|
c@45
|
137 }
|
c@45
|
138 std::cerr << "WARNING: MFCCPlugin::getParameter: unknown parameter \""
|
c@45
|
139 << param << "\"" << std::endl;
|
c@45
|
140 return 0.0;
|
c@45
|
141 }
|
c@45
|
142
|
c@45
|
143 void
|
c@45
|
144 MFCCPlugin::setParameter(std::string param, float value)
|
c@45
|
145 {
|
c@45
|
146 if (param == "nceps") {
|
c@45
|
147 m_bins = lrintf(value);
|
c@45
|
148 } else if (param == "logpower") {
|
c@45
|
149 m_logpower = lrintf(value);
|
c@45
|
150 } else if (param == "wantc0") {
|
c@45
|
151 m_wantC0 = (value > 0.5);
|
c@45
|
152 } else {
|
c@45
|
153 std::cerr << "WARNING: MFCCPlugin::setParameter: unknown parameter \""
|
c@45
|
154 << param << "\"" << std::endl;
|
c@45
|
155 }
|
c@45
|
156
|
c@45
|
157 setupConfig();
|
c@45
|
158 }
|
c@45
|
159
|
c@45
|
160 bool
|
c@45
|
161 MFCCPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
c@45
|
162 {
|
c@45
|
163 if (m_mfcc) {
|
c@45
|
164 delete m_mfcc;
|
c@45
|
165 m_mfcc = 0;
|
c@45
|
166 }
|
c@45
|
167
|
c@45
|
168 if (channels < getMinChannelCount() ||
|
c@45
|
169 channels > getMaxChannelCount()) return false;
|
c@45
|
170
|
c@45
|
171 std::cerr << "MFCCPlugin::initialise: step " << stepSize << ", block "
|
c@45
|
172 << blockSize << std::endl;
|
c@45
|
173
|
c@45
|
174 m_step = stepSize;
|
c@45
|
175 m_block = blockSize;
|
c@45
|
176 setupConfig();
|
c@45
|
177
|
c@45
|
178 m_mfcc = new MFCC(m_config);
|
c@45
|
179
|
c@45
|
180 m_binsums = vector<double>(m_bins);
|
c@45
|
181 for (int i = 0; i < m_bins; ++i) {
|
c@45
|
182 m_binsums[i] = 0.0;
|
c@45
|
183 }
|
c@45
|
184
|
c@45
|
185 return true;
|
c@45
|
186 }
|
c@45
|
187
|
c@45
|
188 void
|
c@45
|
189 MFCCPlugin::reset()
|
c@45
|
190 {
|
c@45
|
191 if (m_mfcc) {
|
c@45
|
192 delete m_mfcc;
|
c@45
|
193 m_mfcc = new MFCC(m_config);
|
c@45
|
194 for (int i = 0; i < m_bins; ++i) {
|
c@45
|
195 m_binsums[i] = 0.0;
|
c@45
|
196 }
|
c@45
|
197 }
|
c@45
|
198 }
|
c@45
|
199
|
c@45
|
200 size_t
|
c@45
|
201 MFCCPlugin::getPreferredStepSize() const
|
c@45
|
202 {
|
c@45
|
203 return 1024;
|
c@45
|
204 }
|
c@45
|
205
|
c@45
|
206 size_t
|
c@45
|
207 MFCCPlugin::getPreferredBlockSize() const
|
c@45
|
208 {
|
c@45
|
209 return 2048;
|
c@45
|
210 }
|
c@45
|
211
|
c@45
|
212 MFCCPlugin::OutputList
|
c@45
|
213 MFCCPlugin::getOutputDescriptors() const
|
c@45
|
214 {
|
c@45
|
215 OutputList list;
|
c@45
|
216
|
c@45
|
217 OutputDescriptor d;
|
c@45
|
218 d.identifier = "coefficients";
|
c@45
|
219 d.name = "Coefficients";
|
c@45
|
220 d.unit = "";
|
c@52
|
221 d.description = "MFCC values";
|
c@45
|
222 d.hasFixedBinCount = true;
|
c@45
|
223 d.binCount = m_bins;
|
c@45
|
224 d.hasKnownExtents = false;
|
c@45
|
225 d.isQuantized = false;
|
c@45
|
226 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
c@45
|
227 list.push_back(d);
|
c@45
|
228
|
c@45
|
229 d.identifier = "means";
|
c@45
|
230 d.name = "Means of Coefficients";
|
c@52
|
231 d.description = "Mean values of MFCCs across duration of audio input";
|
c@45
|
232 d.sampleType = OutputDescriptor::FixedSampleRate;
|
c@45
|
233 d.sampleRate = 1;
|
c@45
|
234 list.push_back(d);
|
c@45
|
235
|
c@45
|
236 return list;
|
c@45
|
237 }
|
c@45
|
238
|
c@45
|
239 MFCCPlugin::FeatureSet
|
c@45
|
240 MFCCPlugin::process(const float *const *inputBuffers,
|
c@45
|
241 Vamp::RealTime /* timestamp */)
|
c@45
|
242 {
|
c@45
|
243 if (!m_mfcc) {
|
c@45
|
244 cerr << "ERROR: MFCCPlugin::process: "
|
c@45
|
245 << "MFCC has not been initialised"
|
c@45
|
246 << endl;
|
c@45
|
247 return FeatureSet();
|
c@45
|
248 }
|
c@45
|
249
|
c@45
|
250 double *real = new double[m_block];
|
c@45
|
251 double *imag = new double[m_block];
|
c@45
|
252
|
c@45
|
253 for (size_t i = 0; i < m_block/2; ++i) {
|
c@45
|
254 real[i] = inputBuffers[0][i*2];
|
c@45
|
255 if (i > 0) real[m_block - i] = real[i];
|
c@45
|
256 imag[i] = inputBuffers[0][i*2+1];
|
c@45
|
257 if (i > 0) imag[m_block - i] = imag[i];
|
c@45
|
258 }
|
c@45
|
259
|
c@45
|
260 double *output = new double[m_bins];
|
c@45
|
261
|
c@45
|
262 m_mfcc->process(real, imag, output);
|
c@45
|
263
|
c@45
|
264 delete[] real;
|
c@45
|
265 delete[] imag;
|
c@45
|
266
|
c@45
|
267 Feature feature;
|
c@45
|
268 feature.hasTimestamp = false;
|
c@45
|
269 for (size_t i = 0; i < m_bins; ++i) {
|
c@45
|
270 double value = output[i];
|
c@45
|
271 if (isnan(value)) value = 0.0;
|
c@45
|
272 m_binsums[i] += value;
|
c@45
|
273 feature.values.push_back(value);
|
c@45
|
274 }
|
c@45
|
275 feature.label = "";
|
c@45
|
276 ++m_count;
|
c@45
|
277
|
c@45
|
278 FeatureSet returnFeatures;
|
c@45
|
279 returnFeatures[0].push_back(feature);
|
c@45
|
280 return returnFeatures;
|
c@45
|
281 }
|
c@45
|
282
|
c@45
|
283 MFCCPlugin::FeatureSet
|
c@45
|
284 MFCCPlugin::getRemainingFeatures()
|
c@45
|
285 {
|
c@45
|
286 Feature feature;
|
c@45
|
287 feature.hasTimestamp = true;
|
c@45
|
288 feature.timestamp = Vamp::RealTime::zeroTime;
|
c@45
|
289
|
c@45
|
290 for (size_t i = 0; i < m_bins; ++i) {
|
c@45
|
291 double v = m_binsums[i];
|
c@45
|
292 if (m_count > 0) v /= m_count;
|
c@45
|
293 feature.values.push_back(v);
|
c@45
|
294 }
|
c@45
|
295 feature.label = "Coefficient means";
|
c@45
|
296
|
c@45
|
297 FeatureSet returnFeatures;
|
c@45
|
298 returnFeatures[1].push_back(feature);
|
c@45
|
299 return returnFeatures;
|
c@45
|
300 }
|
c@45
|
301
|