c@45: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ c@45: c@45: /* c@45: QM Vamp Plugin Set c@45: c@45: Centre for Digital Music, Queen Mary, University of London. c@135: c@135: This program is free software; you can redistribute it and/or c@135: modify it under the terms of the GNU General Public License as c@135: published by the Free Software Foundation; either version 2 of the c@135: License, or (at your option) any later version. See the file c@135: COPYING included with this distribution for more information. c@45: */ c@45: c@45: #include "MFCCPlugin.h" c@45: c@45: #include c@130: #include c@54: c@45: using std::string; c@45: using std::vector; c@45: using std::cerr; c@45: using std::endl; c@45: c@45: MFCCPlugin::MFCCPlugin(float inputSampleRate) : c@45: Vamp::Plugin(inputSampleRate), c@45: m_config(lrintf(inputSampleRate)), c@45: m_mfcc(0), c@45: m_step(1024), c@60: m_block(2048), c@60: m_count(0) c@45: { c@45: m_bins = 20; c@45: m_wantC0 = true; c@45: m_logpower = 1; c@45: c@45: setupConfig(); c@45: } c@45: c@45: void c@45: MFCCPlugin::setupConfig() c@45: { c@45: m_config.FS = lrintf(m_inputSampleRate); c@45: m_config.fftsize = m_block; c@45: m_config.nceps = (m_wantC0 ? m_bins-1 : m_bins); c@45: m_config.want_c0 = m_wantC0; c@45: m_config.logpower = m_logpower; c@45: } c@45: c@45: MFCCPlugin::~MFCCPlugin() c@45: { c@45: delete m_mfcc; c@45: } c@45: c@45: string c@45: MFCCPlugin::getIdentifier() const c@45: { c@45: return "qm-mfcc"; c@45: } c@45: c@45: string c@45: MFCCPlugin::getName() const c@45: { c@45: return "Mel-Frequency Cepstral Coefficients"; c@45: } c@45: c@45: string c@45: MFCCPlugin::getDescription() const c@45: { c@50: return "Calculate a series of MFCC vectors from the audio"; c@45: } c@45: c@45: string c@45: MFCCPlugin::getMaker() const c@45: { c@45: return "Queen Mary, University of London"; c@45: } c@45: c@45: int c@45: MFCCPlugin::getPluginVersion() const c@45: { c@45: return 1; c@45: } c@45: c@45: string c@45: MFCCPlugin::getCopyright() const c@45: { c@118: return "Plugin by Nicolas Chetry and Chris Cannam. Copyright (c) 2009 QMUL - All Rights Reserved"; c@45: } c@45: c@45: MFCCPlugin::ParameterList c@45: MFCCPlugin::getParameterDescriptors() const c@45: { c@45: ParameterList list; c@45: c@45: ParameterDescriptor desc; c@45: desc.identifier = "nceps"; c@45: desc.name = "Number of Coefficients"; c@45: desc.unit = ""; c@52: desc.description = "Number of MFCCs to return, starting from C0 if \"Include C0\" is specified or from C1 otherwise"; c@45: desc.minValue = 1; c@45: desc.maxValue = 40; c@45: desc.defaultValue = 20; c@45: desc.isQuantized = true; c@45: desc.quantizeStep = 1; c@45: list.push_back(desc); c@45: c@45: desc.identifier = "logpower"; c@45: desc.name = "Power for Mel Amplitude Logs"; c@45: desc.unit = ""; c@52: desc.description = "Power to raise the amplitude log values to before applying DCT. Values greater than 1 may reduce contribution of noise"; c@45: desc.minValue = 0; c@45: desc.maxValue = 5; c@45: desc.defaultValue = 1; c@45: desc.isQuantized = false; c@45: desc.quantizeStep = 0; c@45: list.push_back(desc); c@45: c@45: desc.identifier = "wantc0"; c@45: desc.name = "Include C0"; c@45: desc.unit = ""; c@52: desc.description = "Whether to include the C0 (energy level) coefficient in the returned results"; c@45: desc.minValue = 0; c@45: desc.maxValue = 1; c@45: desc.defaultValue = 1; c@45: desc.isQuantized = true; c@45: desc.quantizeStep = 1; c@45: list.push_back(desc); c@45: c@45: return list; c@45: } c@45: c@45: float c@45: MFCCPlugin::getParameter(std::string param) const c@45: { c@45: if (param == "nceps") { c@45: return m_bins; c@45: } c@45: if (param == "logpower") { c@45: return m_logpower; c@45: } c@45: if (param == "wantc0") { c@45: return m_wantC0 ? 1 : 0; c@45: } c@45: std::cerr << "WARNING: MFCCPlugin::getParameter: unknown parameter \"" c@45: << param << "\"" << std::endl; c@45: return 0.0; c@45: } c@45: c@45: void c@45: MFCCPlugin::setParameter(std::string param, float value) c@45: { c@45: if (param == "nceps") { c@45: m_bins = lrintf(value); c@45: } else if (param == "logpower") { c@45: m_logpower = lrintf(value); c@45: } else if (param == "wantc0") { c@45: m_wantC0 = (value > 0.5); c@45: } else { c@45: std::cerr << "WARNING: MFCCPlugin::setParameter: unknown parameter \"" c@45: << param << "\"" << std::endl; c@45: } c@45: c@45: setupConfig(); c@45: } c@45: c@45: bool c@45: MFCCPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize) c@45: { c@45: if (m_mfcc) { c@45: delete m_mfcc; c@45: m_mfcc = 0; c@45: } c@45: c@45: if (channels < getMinChannelCount() || c@45: channels > getMaxChannelCount()) return false; c@45: c@95: // std::cerr << "MFCCPlugin::initialise: step " << stepSize << ", block " c@95: // << blockSize << std::endl; c@45: c@45: m_step = stepSize; c@45: m_block = blockSize; c@45: setupConfig(); c@45: c@45: m_mfcc = new MFCC(m_config); c@45: c@45: m_binsums = vector(m_bins); c@45: for (int i = 0; i < m_bins; ++i) { c@45: m_binsums[i] = 0.0; c@45: } c@45: c@45: return true; c@45: } c@45: c@45: void c@45: MFCCPlugin::reset() c@45: { c@45: if (m_mfcc) { c@45: delete m_mfcc; c@45: m_mfcc = new MFCC(m_config); c@45: for (int i = 0; i < m_bins; ++i) { c@45: m_binsums[i] = 0.0; c@45: } c@45: } c@60: m_count = 0; c@45: } c@45: c@45: size_t c@45: MFCCPlugin::getPreferredStepSize() const c@45: { c@45: return 1024; c@45: } c@45: c@45: size_t c@45: MFCCPlugin::getPreferredBlockSize() const c@45: { c@45: return 2048; c@45: } c@45: c@45: MFCCPlugin::OutputList c@45: MFCCPlugin::getOutputDescriptors() const c@45: { c@45: OutputList list; c@45: c@45: OutputDescriptor d; c@45: d.identifier = "coefficients"; c@45: d.name = "Coefficients"; c@45: d.unit = ""; c@52: d.description = "MFCC values"; c@45: d.hasFixedBinCount = true; c@45: d.binCount = m_bins; c@45: d.hasKnownExtents = false; c@45: d.isQuantized = false; c@45: d.sampleType = OutputDescriptor::OneSamplePerStep; c@45: list.push_back(d); c@45: c@45: d.identifier = "means"; c@45: d.name = "Means of Coefficients"; c@52: d.description = "Mean values of MFCCs across duration of audio input"; c@45: d.sampleType = OutputDescriptor::FixedSampleRate; c@45: d.sampleRate = 1; c@45: list.push_back(d); c@45: c@45: return list; c@45: } c@45: c@45: MFCCPlugin::FeatureSet c@45: MFCCPlugin::process(const float *const *inputBuffers, c@45: Vamp::RealTime /* timestamp */) c@45: { c@45: if (!m_mfcc) { c@45: cerr << "ERROR: MFCCPlugin::process: " c@45: << "MFCC has not been initialised" c@45: << endl; c@45: return FeatureSet(); c@45: } c@45: c@45: double *real = new double[m_block]; c@45: double *imag = new double[m_block]; c@45: c@75: for (size_t i = 0; i <= m_block/2; ++i) { c@45: real[i] = inputBuffers[0][i*2]; c@45: if (i > 0) real[m_block - i] = real[i]; c@45: imag[i] = inputBuffers[0][i*2+1]; c@45: if (i > 0) imag[m_block - i] = imag[i]; c@45: } c@45: c@45: double *output = new double[m_bins]; c@45: c@45: m_mfcc->process(real, imag, output); c@45: c@45: delete[] real; c@45: delete[] imag; c@45: c@45: Feature feature; c@45: feature.hasTimestamp = false; c@178: for (int i = 0; i < m_bins; ++i) { c@45: double value = output[i]; c@130: if (ISNAN(value)) value = 0.0; c@45: m_binsums[i] += value; c@45: feature.values.push_back(value); c@45: } c@45: feature.label = ""; c@45: ++m_count; c@45: c@95: delete[] output; c@95: c@45: FeatureSet returnFeatures; c@45: returnFeatures[0].push_back(feature); c@45: return returnFeatures; c@45: } c@45: c@45: MFCCPlugin::FeatureSet c@45: MFCCPlugin::getRemainingFeatures() c@45: { c@45: Feature feature; c@45: feature.hasTimestamp = true; c@45: feature.timestamp = Vamp::RealTime::zeroTime; c@45: c@178: for (int i = 0; i < m_bins; ++i) { c@45: double v = m_binsums[i]; c@45: if (m_count > 0) v /= m_count; c@45: feature.values.push_back(v); c@45: } c@45: feature.label = "Coefficient means"; c@45: c@45: FeatureSet returnFeatures; c@45: returnFeatures[1].push_back(feature); c@45: return returnFeatures; c@45: } c@45: