annotate plugins/MFCCPlugin.cpp @ 266:d04675d44928 tip master

Refer to SDK from Github
author Chris Cannam <cannam@all-day-breakfast.com>
date Wed, 02 Jun 2021 14:41:26 +0100
parents f96ea0e4b475
children
rev   line source
c@45 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@45 2
c@45 3 /*
c@45 4 QM Vamp Plugin Set
c@45 5
c@45 6 Centre for Digital Music, Queen Mary, University of London.
c@135 7
c@135 8 This program is free software; you can redistribute it and/or
c@135 9 modify it under the terms of the GNU General Public License as
c@135 10 published by the Free Software Foundation; either version 2 of the
c@135 11 License, or (at your option) any later version. See the file
c@135 12 COPYING included with this distribution for more information.
c@45 13 */
c@45 14
c@45 15 #include "MFCCPlugin.h"
c@45 16
c@45 17 #include <dsp/mfcc/MFCC.h>
c@130 18 #include <maths/MathUtilities.h>
c@54 19
c@45 20 using std::string;
c@45 21 using std::vector;
c@45 22 using std::cerr;
c@45 23 using std::endl;
c@45 24
c@45 25 MFCCPlugin::MFCCPlugin(float inputSampleRate) :
c@45 26 Vamp::Plugin(inputSampleRate),
c@45 27 m_config(lrintf(inputSampleRate)),
c@45 28 m_mfcc(0),
c@45 29 m_step(1024),
c@60 30 m_block(2048),
c@60 31 m_count(0)
c@45 32 {
c@45 33 m_bins = 20;
c@45 34 m_wantC0 = true;
c@45 35 m_logpower = 1;
c@45 36
c@45 37 setupConfig();
c@45 38 }
c@45 39
c@45 40 void
c@45 41 MFCCPlugin::setupConfig()
c@45 42 {
c@45 43 m_config.FS = lrintf(m_inputSampleRate);
c@45 44 m_config.fftsize = m_block;
c@45 45 m_config.nceps = (m_wantC0 ? m_bins-1 : m_bins);
c@45 46 m_config.want_c0 = m_wantC0;
c@45 47 m_config.logpower = m_logpower;
c@45 48 }
c@45 49
c@45 50 MFCCPlugin::~MFCCPlugin()
c@45 51 {
c@45 52 delete m_mfcc;
c@45 53 }
c@45 54
c@45 55 string
c@45 56 MFCCPlugin::getIdentifier() const
c@45 57 {
c@45 58 return "qm-mfcc";
c@45 59 }
c@45 60
c@45 61 string
c@45 62 MFCCPlugin::getName() const
c@45 63 {
c@45 64 return "Mel-Frequency Cepstral Coefficients";
c@45 65 }
c@45 66
c@45 67 string
c@45 68 MFCCPlugin::getDescription() const
c@45 69 {
c@50 70 return "Calculate a series of MFCC vectors from the audio";
c@45 71 }
c@45 72
c@45 73 string
c@45 74 MFCCPlugin::getMaker() const
c@45 75 {
c@45 76 return "Queen Mary, University of London";
c@45 77 }
c@45 78
c@45 79 int
c@45 80 MFCCPlugin::getPluginVersion() const
c@45 81 {
c@45 82 return 1;
c@45 83 }
c@45 84
c@45 85 string
c@45 86 MFCCPlugin::getCopyright() const
c@45 87 {
c@118 88 return "Plugin by Nicolas Chetry and Chris Cannam. Copyright (c) 2009 QMUL - All Rights Reserved";
c@45 89 }
c@45 90
c@45 91 MFCCPlugin::ParameterList
c@45 92 MFCCPlugin::getParameterDescriptors() const
c@45 93 {
c@45 94 ParameterList list;
c@45 95
c@45 96 ParameterDescriptor desc;
c@45 97 desc.identifier = "nceps";
c@45 98 desc.name = "Number of Coefficients";
c@45 99 desc.unit = "";
c@52 100 desc.description = "Number of MFCCs to return, starting from C0 if \"Include C0\" is specified or from C1 otherwise";
c@45 101 desc.minValue = 1;
c@45 102 desc.maxValue = 40;
c@45 103 desc.defaultValue = 20;
c@45 104 desc.isQuantized = true;
c@45 105 desc.quantizeStep = 1;
c@45 106 list.push_back(desc);
c@45 107
c@45 108 desc.identifier = "logpower";
c@45 109 desc.name = "Power for Mel Amplitude Logs";
c@45 110 desc.unit = "";
c@52 111 desc.description = "Power to raise the amplitude log values to before applying DCT. Values greater than 1 may reduce contribution of noise";
c@45 112 desc.minValue = 0;
c@45 113 desc.maxValue = 5;
c@45 114 desc.defaultValue = 1;
c@45 115 desc.isQuantized = false;
c@45 116 desc.quantizeStep = 0;
c@45 117 list.push_back(desc);
c@45 118
c@45 119 desc.identifier = "wantc0";
c@45 120 desc.name = "Include C0";
c@45 121 desc.unit = "";
c@52 122 desc.description = "Whether to include the C0 (energy level) coefficient in the returned results";
c@45 123 desc.minValue = 0;
c@45 124 desc.maxValue = 1;
c@45 125 desc.defaultValue = 1;
c@45 126 desc.isQuantized = true;
c@45 127 desc.quantizeStep = 1;
c@45 128 list.push_back(desc);
c@45 129
c@45 130 return list;
c@45 131 }
c@45 132
c@45 133 float
c@45 134 MFCCPlugin::getParameter(std::string param) const
c@45 135 {
c@45 136 if (param == "nceps") {
c@45 137 return m_bins;
c@45 138 }
c@45 139 if (param == "logpower") {
c@45 140 return m_logpower;
c@45 141 }
c@45 142 if (param == "wantc0") {
c@45 143 return m_wantC0 ? 1 : 0;
c@45 144 }
c@45 145 std::cerr << "WARNING: MFCCPlugin::getParameter: unknown parameter \""
c@45 146 << param << "\"" << std::endl;
c@45 147 return 0.0;
c@45 148 }
c@45 149
c@45 150 void
c@45 151 MFCCPlugin::setParameter(std::string param, float value)
c@45 152 {
c@45 153 if (param == "nceps") {
c@45 154 m_bins = lrintf(value);
c@45 155 } else if (param == "logpower") {
c@45 156 m_logpower = lrintf(value);
c@45 157 } else if (param == "wantc0") {
c@45 158 m_wantC0 = (value > 0.5);
c@45 159 } else {
c@45 160 std::cerr << "WARNING: MFCCPlugin::setParameter: unknown parameter \""
c@45 161 << param << "\"" << std::endl;
c@45 162 }
c@45 163
c@45 164 setupConfig();
c@45 165 }
c@45 166
c@45 167 bool
c@45 168 MFCCPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
c@45 169 {
c@45 170 if (m_mfcc) {
c@45 171 delete m_mfcc;
c@45 172 m_mfcc = 0;
c@45 173 }
c@45 174
c@45 175 if (channels < getMinChannelCount() ||
c@45 176 channels > getMaxChannelCount()) return false;
c@45 177
c@95 178 // std::cerr << "MFCCPlugin::initialise: step " << stepSize << ", block "
c@95 179 // << blockSize << std::endl;
c@45 180
c@45 181 m_step = stepSize;
c@45 182 m_block = blockSize;
c@45 183 setupConfig();
c@45 184
c@45 185 m_mfcc = new MFCC(m_config);
c@45 186
c@45 187 m_binsums = vector<double>(m_bins);
c@45 188 for (int i = 0; i < m_bins; ++i) {
c@45 189 m_binsums[i] = 0.0;
c@45 190 }
c@45 191
c@45 192 return true;
c@45 193 }
c@45 194
c@45 195 void
c@45 196 MFCCPlugin::reset()
c@45 197 {
c@45 198 if (m_mfcc) {
c@45 199 delete m_mfcc;
c@45 200 m_mfcc = new MFCC(m_config);
c@45 201 for (int i = 0; i < m_bins; ++i) {
c@45 202 m_binsums[i] = 0.0;
c@45 203 }
c@45 204 }
c@60 205 m_count = 0;
c@45 206 }
c@45 207
c@45 208 size_t
c@45 209 MFCCPlugin::getPreferredStepSize() const
c@45 210 {
c@45 211 return 1024;
c@45 212 }
c@45 213
c@45 214 size_t
c@45 215 MFCCPlugin::getPreferredBlockSize() const
c@45 216 {
c@45 217 return 2048;
c@45 218 }
c@45 219
c@45 220 MFCCPlugin::OutputList
c@45 221 MFCCPlugin::getOutputDescriptors() const
c@45 222 {
c@45 223 OutputList list;
c@45 224
c@45 225 OutputDescriptor d;
c@45 226 d.identifier = "coefficients";
c@45 227 d.name = "Coefficients";
c@45 228 d.unit = "";
c@52 229 d.description = "MFCC values";
c@45 230 d.hasFixedBinCount = true;
c@45 231 d.binCount = m_bins;
c@45 232 d.hasKnownExtents = false;
c@45 233 d.isQuantized = false;
c@45 234 d.sampleType = OutputDescriptor::OneSamplePerStep;
c@45 235 list.push_back(d);
c@45 236
c@45 237 d.identifier = "means";
c@45 238 d.name = "Means of Coefficients";
c@52 239 d.description = "Mean values of MFCCs across duration of audio input";
c@45 240 d.sampleType = OutputDescriptor::FixedSampleRate;
c@45 241 d.sampleRate = 1;
c@45 242 list.push_back(d);
c@45 243
c@45 244 return list;
c@45 245 }
c@45 246
c@45 247 MFCCPlugin::FeatureSet
c@45 248 MFCCPlugin::process(const float *const *inputBuffers,
c@45 249 Vamp::RealTime /* timestamp */)
c@45 250 {
c@45 251 if (!m_mfcc) {
c@45 252 cerr << "ERROR: MFCCPlugin::process: "
c@45 253 << "MFCC has not been initialised"
c@45 254 << endl;
c@45 255 return FeatureSet();
c@45 256 }
c@45 257
c@45 258 double *real = new double[m_block];
c@45 259 double *imag = new double[m_block];
c@45 260
c@75 261 for (size_t i = 0; i <= m_block/2; ++i) {
c@45 262 real[i] = inputBuffers[0][i*2];
c@45 263 if (i > 0) real[m_block - i] = real[i];
c@45 264 imag[i] = inputBuffers[0][i*2+1];
c@45 265 if (i > 0) imag[m_block - i] = imag[i];
c@45 266 }
c@45 267
c@45 268 double *output = new double[m_bins];
c@45 269
c@45 270 m_mfcc->process(real, imag, output);
c@45 271
c@45 272 delete[] real;
c@45 273 delete[] imag;
c@45 274
c@45 275 Feature feature;
c@45 276 feature.hasTimestamp = false;
c@178 277 for (int i = 0; i < m_bins; ++i) {
c@45 278 double value = output[i];
c@130 279 if (ISNAN(value)) value = 0.0;
c@45 280 m_binsums[i] += value;
c@45 281 feature.values.push_back(value);
c@45 282 }
c@45 283 feature.label = "";
c@45 284 ++m_count;
c@45 285
c@95 286 delete[] output;
c@95 287
c@45 288 FeatureSet returnFeatures;
c@45 289 returnFeatures[0].push_back(feature);
c@45 290 return returnFeatures;
c@45 291 }
c@45 292
c@45 293 MFCCPlugin::FeatureSet
c@45 294 MFCCPlugin::getRemainingFeatures()
c@45 295 {
c@45 296 Feature feature;
c@45 297 feature.hasTimestamp = true;
c@45 298 feature.timestamp = Vamp::RealTime::zeroTime;
c@45 299
c@178 300 for (int i = 0; i < m_bins; ++i) {
c@45 301 double v = m_binsums[i];
c@45 302 if (m_count > 0) v /= m_count;
c@45 303 feature.values.push_back(v);
c@45 304 }
c@45 305 feature.label = "Coefficient means";
c@45 306
c@45 307 FeatureSet returnFeatures;
c@45 308 returnFeatures[1].push_back(feature);
c@45 309 return returnFeatures;
c@45 310 }
c@45 311