cannam@0: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ cannam@0: cannam@0: /* cannam@0: Vamp feature extraction plugins using Jamie Bullock's cannam@0: libxtract audio feature extraction library. cannam@0: cannam@0: Centre for Digital Music, Queen Mary, University of London. Chris@28: This file copyright 2006-2012 Queen Mary, University of London. cannam@0: cannam@0: This program is free software; you can redistribute it and/or cannam@0: modify it under the terms of the GNU General Public License as cannam@0: published by the Free Software Foundation; either version 2 of the cannam@0: License, or (at your option) any later version. See the file cannam@0: COPYING included with this distribution for more information. cannam@0: */ cannam@0: cannam@0: #include "XTractPlugin.h" cannam@0: cannam@0: #include Chris@22: #include cannam@1: #include Chris@35: #include cannam@0: cannam@0: using std::cerr; cannam@0: using std::endl; cannam@0: using std::string; cannam@0: cannam@1: xtract_function_descriptor_t * cannam@1: XTractPlugin::m_xtDescriptors = 0; cannam@1: cannam@1: int cannam@1: XTractPlugin::m_xtDescRefCount = 0; cannam@1: cannam@0: XTractPlugin::XTractPlugin(unsigned int xtFeature, float inputSampleRate) : cannam@0: Plugin(inputSampleRate), cannam@0: m_xtFeature(xtFeature), cannam@0: m_channels(0), cannam@0: m_stepSize(0), cannam@0: m_blockSize(0), cannam@0: m_resultBuffer(0), cannam@1: m_peakThreshold(10), cannam@1: m_rolloffThreshold(90), cannam@1: m_harmonicThreshold(.1), cannam@0: m_minFreq(80), cannam@0: m_maxFreq(18000), cannam@9: m_coeffCount(40), cannam@9: m_highestCoef(20), cannam@9: m_lowestCoef(0), cannam@0: m_mfccFilters(0), cannam@1: m_mfccStyle((int)XTRACT_EQUAL_GAIN), cannam@14: m_spectrumType((int)XTRACT_MAGNITUDE_SPECTRUM), cannam@14: m_dc(0), cannam@14: m_normalise(0), cannam@0: m_barkBandLimits(0), cannam@0: m_outputBinCount(0), cannam@0: m_initialised(false) cannam@0: { cannam@1: if (m_xtDescRefCount++ == 0) { cannam@1: m_xtDescriptors = cannam@1: (xtract_function_descriptor_t *)xtract_make_descriptors(); cannam@1: } cannam@0: } cannam@0: cannam@0: XTractPlugin::~XTractPlugin() cannam@0: { cannam@0: if (m_mfccFilters) { cannam@0: for (size_t i = 0; i < m_coeffCount; ++i) { cannam@0: delete[] m_mfccFilters[i]; cannam@0: } cannam@0: delete[] m_mfccFilters; cannam@0: } cannam@0: if (m_barkBandLimits) { cannam@0: delete[] m_barkBandLimits; cannam@0: } cannam@0: if (m_resultBuffer) { cannam@0: delete[] m_resultBuffer; cannam@0: } cannam@1: cannam@1: if (--m_xtDescRefCount == 0) { cannam@1: xtract_free_descriptors(m_xtDescriptors); cannam@1: } cannam@0: } cannam@0: cannam@0: string cannam@2: XTractPlugin::getIdentifier() const cannam@0: { cannam@1: return xtDescriptor()->algo.name; cannam@0: } cannam@0: cannam@0: string cannam@2: XTractPlugin::getName() const cannam@2: { cannam@2: return xtDescriptor()->algo.p_name; cannam@2: } cannam@2: cannam@2: string cannam@0: XTractPlugin::getDescription() const cannam@0: { cannam@2: return xtDescriptor()->algo.p_desc; cannam@0: } cannam@1: cannam@0: cannam@0: string cannam@0: XTractPlugin::getMaker() const cannam@0: { cannam@0: return "libxtract by Jamie Bullock (plugin by Chris Cannam)"; cannam@0: } cannam@0: cannam@0: int cannam@0: XTractPlugin::getPluginVersion() const cannam@0: { Chris@28: return 4; cannam@0: } cannam@0: cannam@0: string cannam@0: XTractPlugin::getCopyright() const cannam@0: { Chris@28: string text = "Copyright 2006-2012 Jamie Bullock, plugin Copyright 2006-2012 Queen Mary, University of London. "; cannam@0: cannam@1: string method = ""; cannam@0: cannam@1: method += xtDescriptor()->algo.author; cannam@0: cannam@9: if (method != "") { cannam@9: int year = xtDescriptor()->algo.year; cannam@9: if (year != 0) { cannam@9: char yearstr[12]; cannam@9: sprintf(yearstr, " (%d)", year); cannam@9: method += yearstr; cannam@9: } cannam@9: text += "Method from " + method + ". "; cannam@9: } cannam@9: cannam@0: text += "Distributed under the GNU General Public License"; cannam@0: return text; cannam@0: } cannam@0: cannam@0: XTractPlugin::InputDomain cannam@0: XTractPlugin::getInputDomain() const cannam@0: { cannam@1: cannam@1: if (xtDescriptor()->data.format == XTRACT_AUDIO_SAMPLES) cannam@1: return TimeDomain; cannam@1: else cannam@1: return FrequencyDomain; cannam@0: } cannam@1: cannam@1: cannam@9: bool XTractPlugin::m_anyInitialised = false; cannam@0: cannam@0: bool cannam@0: XTractPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize) cannam@0: { cannam@1: cannam@1: int donor = *(xtDescriptor()->argv.donor), cannam@1: data_format = xtDescriptor()->data.format; cannam@1: cannam@0: if (channels < getMinChannelCount() || cannam@0: channels > getMaxChannelCount()) return false; cannam@0: cannam@9: if (blockSize != getPreferredBlockSize()) { cannam@9: cerr << "XTractPlugin::initialise: ERROR: " cannam@9: << "Only the standard block size of " << getPreferredBlockSize() cannam@9: << " is supported (owing to global FFT initialisation requirements)" << endl; cannam@9: return false; cannam@9: } cannam@9: cannam@0: m_channels = channels; cannam@0: m_stepSize = stepSize; cannam@0: m_blockSize = blockSize; cannam@0: cannam@9: if (!m_anyInitialised) { cannam@9: m_anyInitialised = true; cannam@9: // initialise libxtract cannam@9: xtract_init_fft(m_blockSize, XTRACT_SPECTRUM); cannam@9: xtract_init_fft(m_blockSize, XTRACT_AUTOCORRELATION_FFT); cannam@9: xtract_init_fft(m_blockSize, XTRACT_DCT); cannam@9: xtract_init_fft(m_blockSize, XTRACT_MFCC); cannam@9: } cannam@9: cannam@1: if (donor == XTRACT_INIT_MFCC) { cannam@0: Chris@34: m_mfccFilters = new double *[m_coeffCount]; cannam@0: for (size_t i = 0; i < m_coeffCount; ++i) { Chris@34: m_mfccFilters[i] = new double[m_blockSize]; cannam@0: } cannam@0: cannam@0: int error = (int)xtract_init_mfcc(m_blockSize, m_inputSampleRate/2, cannam@0: m_mfccStyle, m_minFreq, m_maxFreq, cannam@0: m_coeffCount, m_mfccFilters); cannam@1: if (error != XTRACT_SUCCESS) { cannam@0: cerr << "XTractPlugin::initialise: ERROR: " cannam@0: << "xtract_init_mfcc returned error code " << error << endl; cannam@0: return false; cannam@0: } cannam@0: cannam@1: } else if (donor == XTRACT_BARK_COEFFICIENTS || cannam@7: donor == XTRACT_INIT_BARK || cannam@1: data_format == XTRACT_BARK_COEFFS) { cannam@7: cannam@1: m_barkBandLimits = new int[XTRACT_BARK_BANDS]; cannam@0: cannam@1: /*int error = *(int)*/xtract_init_bark(m_blockSize, m_inputSampleRate, cannam@0: m_barkBandLimits); cannam@0: // if (error != SUCCESS) { cannam@0: // cerr << "XTractPlugin::initialise: ERROR: " cannam@0: // << "xtract_init_bark returned error code " << error << endl; cannam@0: // return false; cannam@0: // } cannam@0: } cannam@0: cannam@0: switch (m_xtFeature) { cannam@1: case XTRACT_SPECTRUM: cannam@14: m_outputBinCount = m_blockSize / 2 + (m_dc ? 1 : 0); break; cannam@1: case XTRACT_HARMONIC_SPECTRUM: cannam@1: case XTRACT_PEAK_SPECTRUM: cannam@1: m_outputBinCount = m_blockSize / 2; break; cannam@1: case XTRACT_DCT: cannam@1: case XTRACT_AUTOCORRELATION_FFT: cannam@1: case XTRACT_AUTOCORRELATION: cannam@1: case XTRACT_AMDF: cannam@1: case XTRACT_ASDF: cannam@1: m_outputBinCount = m_blockSize; break; cannam@1: case XTRACT_MFCC: cannam@9: m_outputBinCount = (m_highestCoef - m_lowestCoef)+1; break; cannam@1: case XTRACT_BARK_COEFFICIENTS: cannam@1: m_outputBinCount = XTRACT_BARK_BANDS; break; cannam@1: default: cannam@1: m_outputBinCount = 1; break; cannam@0: } cannam@0: cannam@13: m_outputDescriptors.clear(); cannam@0: setupOutputDescriptors(); cannam@0: cannam@0: m_initialised = true; cannam@0: cannam@0: return true; cannam@0: } cannam@0: cannam@0: void cannam@0: XTractPlugin::reset() cannam@0: { cannam@0: } cannam@0: cannam@0: size_t cannam@0: XTractPlugin::getMinChannelCount() const cannam@0: { cannam@0: return 1; cannam@0: } cannam@0: cannam@0: size_t cannam@0: XTractPlugin::getMaxChannelCount() const cannam@0: { cannam@0: return 1; cannam@0: } cannam@0: cannam@0: size_t cannam@0: XTractPlugin::getPreferredStepSize() const cannam@0: { cannam@0: if (getInputDomain() == FrequencyDomain) { cannam@1: return getPreferredBlockSize(); cannam@1: } else { cannam@0: return getPreferredBlockSize() / 2; cannam@0: } cannam@0: } cannam@0: cannam@0: size_t cannam@0: XTractPlugin::getPreferredBlockSize() const cannam@0: { cannam@0: return 1024; cannam@0: } cannam@0: cannam@0: XTractPlugin::ParameterList cannam@0: XTractPlugin::getParameterDescriptors() const cannam@0: { cannam@0: ParameterList list; cannam@0: ParameterDescriptor desc; cannam@0: cannam@1: if (m_xtFeature == XTRACT_MFCC) { cannam@0: cannam@2: desc.identifier = "minfreq"; cannam@2: desc.name = "Minimum Frequency"; cannam@0: desc.minValue = 0; cannam@0: desc.maxValue = m_inputSampleRate / 2; cannam@0: desc.defaultValue = 80; cannam@0: desc.isQuantized = false; cannam@0: desc.unit = "Hz"; cannam@0: list.push_back(desc); cannam@0: cannam@2: desc.identifier = "maxfreq"; cannam@2: desc.name = "Maximum Frequency"; cannam@0: desc.defaultValue = 18000; cannam@0: if (desc.defaultValue > m_inputSampleRate * 0.875) { cannam@0: desc.defaultValue = m_inputSampleRate * 0.875; cannam@0: } cannam@0: list.push_back(desc); cannam@0: cannam@2: desc.identifier = "bands"; cannam@9: desc.name = "# Mel Frequency Bands"; cannam@0: desc.minValue = 10; cannam@9: desc.maxValue = 80; cannam@9: desc.defaultValue = 40; cannam@9: desc.unit = ""; cannam@9: desc.isQuantized = true; cannam@9: desc.quantizeStep = 1; cannam@9: list.push_back(desc); cannam@9: cannam@9: desc.identifier = "lowestcoef"; cannam@9: desc.name = "Lowest Coefficient Returned"; cannam@9: desc.minValue = 0; cannam@9: desc.maxValue = 80; cannam@9: desc.defaultValue = 0; cannam@9: desc.unit = ""; cannam@9: desc.isQuantized = true; cannam@9: desc.quantizeStep = 1; cannam@9: list.push_back(desc); cannam@9: cannam@9: desc.identifier = "highestcoef"; cannam@9: desc.name = "Highest Coefficient Returned"; cannam@9: desc.minValue = 0; cannam@9: desc.maxValue = 80; cannam@0: desc.defaultValue = 20; cannam@0: desc.unit = ""; cannam@0: desc.isQuantized = true; cannam@0: desc.quantizeStep = 1; cannam@0: list.push_back(desc); cannam@0: cannam@2: desc.identifier = "style"; cannam@2: desc.name = "MFCC Type"; cannam@0: desc.minValue = 0; cannam@0: desc.maxValue = 1; cannam@0: desc.defaultValue = 0; cannam@0: desc.valueNames.push_back("Equal Gain"); cannam@0: desc.valueNames.push_back("Equal Area"); cannam@0: list.push_back(desc); cannam@0: } cannam@0: cannam@14: if (m_xtFeature == XTRACT_SPECTRUM) { cannam@14: cannam@14: desc.identifier = "spectrumtype"; cannam@14: desc.name = "Type"; cannam@14: desc.minValue = 0; cannam@14: desc.maxValue = 3; cannam@14: desc.defaultValue = int(XTRACT_MAGNITUDE_SPECTRUM); cannam@14: desc.isQuantized = true; cannam@14: desc.quantizeStep = 1; cannam@14: desc.valueNames.push_back("Magnitude Spectrum"); cannam@14: desc.valueNames.push_back("Log Magnitude Spectrum"); cannam@14: desc.valueNames.push_back("Power Spectrum"); cannam@14: desc.valueNames.push_back("Log Power Spectrum"); cannam@14: list.push_back(desc); cannam@14: cannam@14: desc.identifier = "dc"; cannam@14: desc.name = "Include DC"; cannam@14: desc.maxValue = 1; cannam@14: desc.defaultValue = 0; cannam@14: desc.valueNames.clear(); cannam@14: list.push_back(desc); cannam@14: cannam@14: desc.identifier = "normalise"; cannam@14: desc.name = "Normalise"; cannam@14: list.push_back(desc); cannam@14: } cannam@14: cannam@0: if (needPeakThreshold()) { cannam@0: cannam@10: desc.identifier = "peak-threshold"; cannam@2: desc.name = "Peak Threshold"; cannam@0: desc.minValue = 0; cannam@0: desc.maxValue = 100; cannam@1: desc.defaultValue = 10; /* Threshold as % of maximum peak found */ cannam@0: desc.isQuantized = false; cannam@0: desc.valueNames.clear(); cannam@0: desc.unit = "%"; cannam@0: list.push_back(desc); cannam@0: cannam@1: } cannam@1: cannam@1: if (needRolloffThreshold()) { cannam@0: cannam@10: desc.identifier = "rolloff-threshold"; cannam@2: desc.name = "Rolloff Threshold"; cannam@0: desc.minValue = 0; cannam@0: desc.maxValue = 100; cannam@1: desc.defaultValue = 90; /* Freq below which 90% of energy is */ cannam@0: desc.isQuantized = false; cannam@0: desc.valueNames.clear(); cannam@0: desc.unit = "%"; cannam@0: list.push_back(desc); cannam@1: cannam@1: } cannam@1: cannam@1: if (needHarmonicThreshold()) { cannam@1: cannam@10: desc.identifier = "harmonic-threshold"; cannam@2: desc.name = "Harmonic Threshold"; cannam@1: desc.minValue = 0; cannam@1: desc.maxValue = 1.0; cannam@1: desc.defaultValue = .1; /* Distance from nearesst harmonic number */ cannam@1: desc.isQuantized = false; cannam@1: desc.valueNames.clear(); cannam@1: desc.unit = ""; cannam@1: list.push_back(desc); cannam@0: } cannam@0: cannam@0: return list; cannam@0: } cannam@0: cannam@0: float cannam@0: XTractPlugin::getParameter(string param) const cannam@0: { cannam@1: if (m_xtFeature == XTRACT_MFCC) { cannam@0: if (param == "minfreq") return m_minFreq; cannam@0: if (param == "maxfreq") return m_maxFreq; cannam@0: if (param == "bands") return m_coeffCount; cannam@9: if (param == "lowestcoef") return m_lowestCoef; cannam@9: if (param == "highestcoef") return m_highestCoef; cannam@0: if (param == "style") return m_mfccStyle; cannam@0: } cannam@0: cannam@14: if (m_xtFeature == XTRACT_SPECTRUM) { cannam@14: if (param == "spectrumtype") return m_spectrumType; cannam@14: if (param == "dc") return m_dc; cannam@14: if (param == "normalise") return m_normalise; cannam@14: } cannam@14: cannam@10: if (param == "peak-threshold") return m_peakThreshold; cannam@10: if (param == "rolloff-threshold") return m_rolloffThreshold; cannam@10: if (param == "harmonic-threshold") return m_harmonicThreshold; cannam@0: cannam@0: return 0.f; cannam@0: } cannam@0: cannam@0: void cannam@0: XTractPlugin::setParameter(string param, float value) cannam@0: { cannam@1: if (m_xtFeature == XTRACT_MFCC) { cannam@0: if (param == "minfreq") m_minFreq = value; cannam@0: else if (param == "maxfreq") m_maxFreq = value; cannam@14: else if (param == "bands") m_coeffCount = int(value + .1); cannam@9: else if (param == "lowestcoef"){ cannam@14: m_lowestCoef = int(value + .1); cannam@9: if(m_lowestCoef >= m_coeffCount) m_lowestCoef = m_coeffCount - 1; cannam@9: if(m_lowestCoef > m_highestCoef) m_lowestCoef = m_highestCoef; cannam@9: } cannam@9: else if (param == "highestcoef"){ cannam@14: m_highestCoef = int(value + .1); cannam@9: if(m_highestCoef >= m_coeffCount) m_highestCoef = m_coeffCount - 1; cannam@9: if(m_highestCoef < m_lowestCoef) m_highestCoef = m_lowestCoef; cannam@9: } cannam@14: else if (param == "style") m_mfccStyle = int(value + .1); cannam@14: } cannam@14: cannam@14: if (m_xtFeature == XTRACT_SPECTRUM) { cannam@14: if (param == "spectrumtype") m_spectrumType = int(value + .1); cannam@14: if (param == "dc") m_dc = int(value + .1); cannam@14: if (param == "normalise") m_normalise = int(value + .1); cannam@0: } cannam@0: cannam@10: if (param == "peak-threshold") m_peakThreshold = value; cannam@10: if (param == "rolloff-threshold") m_rolloffThreshold = value; cannam@10: if (param == "harmonic-threshold") m_harmonicThreshold = value; cannam@0: } cannam@0: cannam@0: XTractPlugin::OutputList cannam@0: XTractPlugin::getOutputDescriptors() const cannam@0: { cannam@13: if (m_outputDescriptors.empty()) { cannam@13: setupOutputDescriptors(); cannam@13: } cannam@0: return m_outputDescriptors; cannam@0: } cannam@0: cannam@0: void cannam@0: XTractPlugin::setupOutputDescriptors() const cannam@0: { cannam@0: OutputDescriptor d; cannam@1: const xtract_function_descriptor_t *xtFd = xtDescriptor(); cannam@2: d.identifier = getIdentifier(); cannam@2: d.name = getName(); cannam@2: d.description = getDescription(); cannam@0: d.unit = ""; cannam@0: d.hasFixedBinCount = true; cannam@0: d.binCount = m_outputBinCount; cannam@0: d.hasKnownExtents = false; cannam@0: d.isQuantized = false; cannam@0: d.sampleType = OutputDescriptor::OneSamplePerStep; cannam@0: cannam@9: if (xtFd->is_scalar){ cannam@1: switch(xtFd->result.scalar.unit){ cannam@1: case XTRACT_HERTZ: d.unit = "Hz"; break; cannam@1: case XTRACT_DBFS: d.unit = "dB"; break; cannam@1: default: d.unit = ""; break; cannam@1: } cannam@1: } cannam@1: else { cannam@1: if (xtFd->result.vector.format == XTRACT_SPECTRAL){ cannam@0: cannam@1: d.binCount /= 2; cannam@2: d.identifier = "amplitudes"; cannam@2: d.name = "Peak Amplitudes"; cannam@2: d.description = ""; cannam@1: } cannam@1: } cannam@0: cannam@0: m_outputDescriptors.push_back(d); cannam@0: } cannam@0: cannam@0: bool cannam@0: XTractPlugin::needPeakThreshold() const cannam@0: { cannam@1: const xtract_function_descriptor_t *xtFd = xtDescriptor(); cannam@0: cannam@1: if(m_xtFeature == XTRACT_PEAK_SPECTRUM || cannam@1: xtFd->data.format == XTRACT_SPECTRAL_PEAKS || cannam@1: xtFd->data.format == XTRACT_SPECTRAL_PEAKS_MAGNITUDES || cannam@1: needHarmonicThreshold()) cannam@1: return true; cannam@1: else return false; cannam@1: } cannam@1: cannam@1: bool cannam@1: XTractPlugin::needHarmonicThreshold() const cannam@1: { cannam@1: const xtract_function_descriptor_t *xtFd = xtDescriptor(); cannam@1: cannam@1: if(m_xtFeature == XTRACT_HARMONIC_SPECTRUM || cannam@1: xtFd->data.format == XTRACT_SPECTRAL_HARMONICS_FREQUENCIES || cannam@1: m_xtFeature == XTRACT_NOISINESS || cannam@1: xtFd->data.format == XTRACT_SPECTRAL_HARMONICS_MAGNITUDES) cannam@1: return true; cannam@1: else return false; cannam@1: } cannam@1: cannam@1: bool cannam@1: XTractPlugin::needRolloffThreshold() const cannam@1: { cannam@1: if(m_xtFeature == XTRACT_ROLLOFF) cannam@1: return true; cannam@1: else cannam@1: return false; cannam@0: } cannam@0: cannam@0: XTractPlugin::FeatureSet cannam@0: XTractPlugin::process(const float *const *inputBuffers, cannam@0: Vamp::RealTime timestamp) cannam@0: { cannam@13: if (m_outputDescriptors.empty()) { cannam@13: setupOutputDescriptors(); cannam@13: } cannam@0: cannam@14: int rbs = cannam@14: // Add 2 here to accommodate extra data for spectrum with DC cannam@14: 2 + (m_outputBinCount > m_blockSize ? m_outputBinCount : m_blockSize); cannam@0: if (!m_resultBuffer) { Chris@34: m_resultBuffer = new double[rbs]; cannam@0: } cannam@0: cannam@1: int i; cannam@1: cannam@1: for (i = 0; i < rbs; ++i) m_resultBuffer[i] = 0.f; cannam@1: cannam@1: int N = m_blockSize, M = N >> 1; Chris@34: Chris@34: const double *data = 0; Chris@34: double *input_d = new double[N]; Chris@34: for (int i = 0; i < N; ++i) { Chris@34: input_d[i] = inputBuffers[0][i]; Chris@34: } Chris@34: Chris@34: double *fft_temp = 0, *data_temp = 0; cannam@0: void *argv = 0; cannam@1: bool isSpectral = false; cannam@1: xtract_function_descriptor_t *xtFd = xtDescriptor(); cannam@0: cannam@0: FeatureSet fs; cannam@0: cannam@1: switch (xtFd->data.format) { cannam@1: case XTRACT_AUDIO_SAMPLES: Chris@34: data = input_d; cannam@1: break; cannam@1: case XTRACT_SPECTRAL: cannam@1: default: cannam@1: // All the rest are derived from the spectrum cannam@1: // Need same format as would be output by xtract_spectrum Chris@34: double q = m_inputSampleRate / N; Chris@34: fft_temp = new double[N]; cannam@1: for (int n = 1; n < N/2; ++n) { Chris@34: fft_temp[n] = sqrt(input_d[n*2] * Chris@34: input_d[n*2] + input_d[n*2+1] * Chris@34: input_d[n*2+1]) / N; cannam@1: fft_temp[N-n] = (N/2 - n) * q; cannam@1: } Chris@34: fft_temp[0] = fabs(input_d[0]) / N; Chris@34: fft_temp[N/2] = fabs(input_d[N]) / N; cannam@1: data = &fft_temp[0]; cannam@1: isSpectral = true; cannam@1: break; cannam@0: } cannam@0: cannam@0: assert(m_outputBinCount > 0); cannam@0: Chris@34: double *result = m_resultBuffer; cannam@0: Chris@34: double argf[XTRACT_MAXARGS]; cannam@0: argv = &argf[0]; cannam@14: argf[0] = 0.f; // handy for some, e.g. lowest_value which has a threshold cannam@0: Chris@34: double mean, variance, sd, npartials, nharmonics; cannam@0: cannam@1: bool needSD, needVariance, needMean, needPeaks, cannam@1: needBarkCoefficients, needHarmonics, needF0, needSFM, needMax, cannam@1: needNumPartials, needNumHarmonics; cannam@0: cannam@1: int donor; cannam@0: cannam@1: needSD = needVariance = needMean = needPeaks = cannam@1: needBarkCoefficients = needF0 = needHarmonics = needSFM = needMax = cannam@1: needNumPartials = needNumHarmonics = 0; cannam@0: cannam@1: mean = variance = sd = npartials = nharmonics = 0.f; cannam@0: cannam@1: i = xtFd->argc; cannam@0: cannam@1: while(i--){ cannam@14: if (m_xtFeature == XTRACT_BARK_COEFFICIENTS) { cannam@14: /* "BARK_COEFFICIENTS is special because argc = BARK_BANDS" */ cannam@14: break; cannam@14: } cannam@1: donor = xtFd->argv.donor[i]; cannam@1: switch(donor){ cannam@1: case XTRACT_STANDARD_DEVIATION: cannam@1: case XTRACT_SPECTRAL_STANDARD_DEVIATION: cannam@1: needSD = 1; cannam@1: break; cannam@1: case XTRACT_VARIANCE: cannam@1: case XTRACT_SPECTRAL_VARIANCE: cannam@1: needVariance = 1; cannam@1: break; cannam@1: case XTRACT_MEAN: cannam@1: case XTRACT_SPECTRAL_MEAN: cannam@1: needMean = 1; cannam@1: break; cannam@1: case XTRACT_F0: cannam@1: case XTRACT_FAILSAFE_F0: cannam@1: needF0 = 1; cannam@1: break; cannam@1: case XTRACT_FLATNESS: cannam@1: needSFM = 1; cannam@1: case XTRACT_HIGHEST_VALUE: cannam@1: needMax = 1; cannam@1: break; cannam@1: } cannam@1: } cannam@1: cannam@1: if(needHarmonicThreshold() && m_xtFeature != XTRACT_HARMONIC_SPECTRUM) cannam@1: needHarmonics = needF0 = 1; cannam@1: cannam@1: if(needPeakThreshold() && m_xtFeature != XTRACT_PEAK_SPECTRUM) cannam@1: needPeaks = 1; cannam@1: cannam@1: if(xtFd->data.format == XTRACT_BARK_COEFFS && cannam@1: m_xtFeature != XTRACT_BARK_COEFFICIENTS){ cannam@1: needBarkCoefficients = 1; cannam@0: } cannam@0: cannam@0: if (needMean) { cannam@1: if(isSpectral) cannam@1: xtract_spectral_mean(data, N, 0, result); cannam@1: else cannam@1: xtract_mean(data, M, 0, result); cannam@0: mean = *result; cannam@0: *result = 0.f; cannam@0: } cannam@0: cannam@1: if (needVariance || needSD) { cannam@0: argf[0] = mean; cannam@1: if(isSpectral) cannam@1: xtract_spectral_variance(data, N, argv, result); cannam@1: else cannam@1: xtract_variance(data, M, argv, result); cannam@0: variance = *result; cannam@0: *result = 0.f; cannam@0: } cannam@0: cannam@0: if (needSD) { cannam@0: argf[0] = variance; cannam@1: if(isSpectral) cannam@1: xtract_spectral_standard_deviation(data, N, argv, result); cannam@1: else cannam@1: xtract_standard_deviation(data, M, argv, result); cannam@0: sd = *result; cannam@0: *result = 0.f; cannam@0: } cannam@0: cannam@1: if (needMax) { cannam@1: xtract_highest_value(data, M, argv, result); cannam@1: argf[1] = *result; cannam@1: *result = 0.f; cannam@1: } cannam@1: cannam@0: if (needSD) { cannam@0: argf[0] = mean; cannam@0: argf[1] = sd; cannam@0: } else if (needVariance) { cannam@0: argf[0] = variance; cannam@0: } else if (needMean) { cannam@0: argf[0] = mean; cannam@0: } cannam@0: cannam@0: // data should be now correct for all except: cannam@1: // XTRACT_SPECTRAL_CENTROID -- N/2 magnitude peaks and N/2 frequencies cannam@1: // TONALITY -- SFM cannam@0: // TRISTIMULUS_1/2/3 -- harmonic spectrum cannam@0: // ODD_EVEN_RATIO -- harmonic spectrum cannam@0: // LOUDNESS -- Bark coefficients cannam@1: // XTRACT_HARMONIC_SPECTRUM -- peak spectrum cannam@0: cannam@0: // argv should be now correct for all except: cannam@0: // cannam@1: // XTRACT_ROLLOFF -- (sr/N), threshold (%) cannam@1: // XTRACT_PEAK_SPECTRUM -- (sr / N), peak threshold (%) cannam@1: // XTRACT_HARMONIC_SPECTRUM -- f0, harmonic threshold cannam@1: // XTRACT_F0 -- samplerate cannam@1: // XTRACT_MFCC -- Mel filter coefficients cannam@1: // XTRACT_BARK_COEFFICIENTS -- Bark band limits cannam@1: // XTRACT_NOISINESS -- npartials, nharmonics. cannam@14: // XTRACT_SPECTRUM -- q, spectrum type, dc, normalise cannam@0: Chris@34: data_temp = new double[N]; cannam@1: cannam@1: if (m_xtFeature == XTRACT_ROLLOFF || cannam@9: m_xtFeature == XTRACT_PEAK_SPECTRUM || needPeaks) { cannam@1: argf[0] = m_inputSampleRate / N; cannam@1: if(m_xtFeature == XTRACT_ROLLOFF) cannam@1: argf[1] = m_rolloffThreshold; cannam@1: else cannam@1: argf[1] = m_peakThreshold; cannam@0: argv = &argf[0]; cannam@0: } cannam@0: cannam@14: if (m_xtFeature == XTRACT_SPECTRUM) { cannam@14: argf[0] = 0; // xtract_spectrum will calculate this for us cannam@14: argf[1] = m_spectrumType; cannam@14: argf[2] = m_dc; cannam@14: argf[3] = m_normalise; cannam@14: argv = &argf[0]; cannam@14: } cannam@14: cannam@0: if (needPeaks) { cannam@1: //We only read in the magnitudes (M) cannam@1: /*int rv = */ xtract_peak_spectrum(data, M, argv, result); cannam@0: for (int n = 0; n < N; ++n) { cannam@1: data_temp[n] = result[n]; cannam@0: result[n] = 0.f; cannam@0: } cannam@0: // rv not trustworthy cannam@0: // if (rv != SUCCESS) { cannam@0: // cerr << "ERROR: XTractPlugin::process: xtract_peaks failed (error code = " << rv << ")" << endl; cannam@0: // goto done; cannam@0: // } cannam@0: } cannam@0: cannam@1: if (needNumPartials) { cannam@1: xtract_nonzero_count(data_temp, M, NULL, &npartials); cannam@1: } cannam@1: cannam@1: if (needF0 || m_xtFeature == XTRACT_FAILSAFE_F0 || cannam@1: m_xtFeature == XTRACT_F0) { cannam@1: argf[0] = m_inputSampleRate; cannam@1: argv = &argf[0]; cannam@1: } cannam@1: cannam@1: if (needF0) { Chris@34: xtract_failsafe_f0(&input_d[0], N, (void *)&m_inputSampleRate, result); cannam@1: argf[0] = *result; cannam@1: argv = &argf[0]; cannam@1: } cannam@1: cannam@1: if (needSFM) { cannam@1: xtract_flatness(data, N >> 1, 0, &argf[0]); cannam@1: argv = &argf[0]; cannam@1: } cannam@1: cannam@1: if (needHarmonics || m_xtFeature == XTRACT_HARMONIC_SPECTRUM){ cannam@1: argf[1] = m_harmonicThreshold; cannam@1: } cannam@1: cannam@1: if (needHarmonics){ cannam@1: xtract_harmonic_spectrum(data_temp, N, argv, result); cannam@1: for (int n = 0; n < N; ++n) { cannam@1: data_temp[n] = result[n]; cannam@1: result[n] = 0.f; cannam@1: } cannam@1: } cannam@1: cannam@1: if (needNumHarmonics) { cannam@1: xtract_nonzero_count(data_temp, M, NULL, &nharmonics); cannam@1: } cannam@1: cannam@1: if (m_xtFeature == XTRACT_NOISINESS) { cannam@1: cannam@1: argf[0] = nharmonics; cannam@1: argf[1] = npartials; cannam@1: argv = &argf[0]; cannam@1: cannam@1: } cannam@1: cannam@1: if (needBarkCoefficients || m_xtFeature == XTRACT_BARK_COEFFICIENTS) { cannam@1: argv = &m_barkBandLimits[0]; cannam@1: } cannam@1: cannam@1: xtract_mel_filter mfccFilterBank; cannam@1: if (m_xtFeature == XTRACT_MFCC) { cannam@1: mfccFilterBank.n_filters = m_coeffCount; cannam@1: mfccFilterBank.filters = m_mfccFilters; cannam@1: argv = &mfccFilterBank; cannam@1: } cannam@1: cannam@0: if (needBarkCoefficients) { cannam@1: cannam@1: /*int rv = */ xtract_bark_coefficients(data, 0, argv, data_temp); cannam@0: // if (rv != SUCCESS) { cannam@0: // cerr << "ERROR: XTractPlugin::process: xtract_bark_coefficients failed (error code = " << rv << ")" << endl; cannam@0: // goto done; cannam@0: // } cannam@1: data = &data_temp[0]; cannam@0: argv = 0; cannam@0: } cannam@1: cannam@1: if (xtFd->data.format == XTRACT_SPECTRAL_HARMONICS_FREQUENCIES) { cannam@0: cannam@1: N = M; cannam@1: data = &data_temp[N]; cannam@0: cannam@1: } else if (xtFd->data.format == XTRACT_SPECTRAL_HARMONICS_MAGNITUDES) { cannam@0: cannam@1: N = M; cannam@1: data = &data_temp[0]; cannam@1: cannam@1: } cannam@0: cannam@1: // If we only want spectral magnitudes, use first half of the input array cannam@1: else if(xtFd->data.format == XTRACT_SPECTRAL_MAGNITUDES || cannam@1: xtFd->data.format == XTRACT_SPECTRAL_PEAKS_MAGNITUDES || cannam@1: xtFd->data.format == XTRACT_ARBITRARY_SERIES) { cannam@1: N = M; cannam@1: } cannam@1: cannam@1: else if(xtFd->data.format == XTRACT_BARK_COEFFS) { cannam@1: cannam@1: N = XTRACT_BARK_BANDS - 1; /* Because our SR is 44100 (< 54000)*/ cannam@1: } cannam@1: cannam@1: if (needPeaks && !needHarmonics) { cannam@1: cannam@1: data = &data_temp[0]; cannam@1: cannam@0: } cannam@0: cannam@0: // now the main result cannam@0: xtract[m_xtFeature](data, N, argv, result); cannam@0: cannam@1: //haveResult: cannam@1: // { cannam@0: int index = 0; cannam@0: cannam@0: for (size_t output = 0; output < m_outputDescriptors.size(); ++output) { cannam@0: cannam@0: Feature feature; cannam@0: feature.hasTimestamp = false; cannam@0: bool good = true; cannam@0: cannam@0: for (size_t n = 0; n < m_outputDescriptors[output].binCount; ++n) { Chris@34: double value = m_resultBuffer[index + m_lowestCoef]; cannam@0: if (isnan(value) || isinf(value)) { cannam@0: good = false; cannam@0: index += (m_outputDescriptors[output].binCount - n); cannam@0: break; cannam@0: } cannam@0: feature.values.push_back(value); cannam@0: ++index; cannam@0: } cannam@13: cannam@0: if (good) fs[output].push_back(feature); cannam@0: } cannam@1: // } cannam@0: cannam@1: //done: cannam@1: delete[] fft_temp; cannam@1: delete[] data_temp; Chris@34: delete[] input_d; cannam@0: cannam@3: // cerr << "XTractPlugin::process returning" << endl; cannam@0: cannam@0: return fs; cannam@0: } cannam@0: cannam@0: XTractPlugin::FeatureSet cannam@0: XTractPlugin::getRemainingFeatures() cannam@0: { cannam@0: return FeatureSet(); cannam@0: } cannam@0: