Chris@37: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@37: Chris@37: /* Chris@37: Vamp feature extraction plugin using the MATCH audio alignment Chris@37: algorithm. Chris@37: Chris@37: Centre for Digital Music, Queen Mary, University of London. Chris@236: Copyright (c) 2007-2020 Simon Dixon, Chris Cannam, and Queen Mary Chris@230: University of London, Copyright (c) 2014-2015 Tido GmbH. Chris@37: Chris@37: This program is free software; you can redistribute it and/or Chris@37: modify it under the terms of the GNU General Public License as Chris@37: published by the Free Software Foundation; either version 2 of the Chris@37: License, or (at your option) any later version. See the file Chris@37: COPYING included with this distribution for more information. Chris@37: */ Chris@37: Chris@37: #include "FeatureExtractor.h" Chris@37: Chris@37: #include Chris@37: Chris@37: #include Chris@37: #include Chris@37: #include Chris@37: Chris@37: using namespace std; Chris@37: Chris@174: //#define DEBUG_FEATURE_EXTRACTOR 1 Chris@140: Chris@37: FeatureExtractor::FeatureExtractor(Parameters parameters) : Chris@103: m_params(parameters) Chris@37: { Chris@74: m_featureSize = getFeatureSizeFor(parameters); Chris@37: makeFreqMap(); Chris@140: Chris@140: #ifdef DEBUG_FEATURE_EXTRACTOR Chris@140: cerr << "*** FeatureExtractor: sampleRate = " << parameters.sampleRate Chris@140: << ", useChromaFrequencyMap = " << parameters.useChromaFrequencyMap Chris@140: << ", fftSize = " << parameters.fftSize << endl; Chris@140: #endif Chris@37: } Chris@37: Chris@74: int Chris@74: FeatureExtractor::getFeatureSizeFor(Parameters parameters) Chris@74: { Chris@74: if (parameters.useChromaFrequencyMap) { Chris@74: return 13; Chris@74: } else { Chris@74: return 84; Chris@74: } Chris@74: } Chris@74: Chris@37: void Chris@37: FeatureExtractor::makeFreqMap() Chris@37: { Chris@37: m_freqMap = vector(m_params.fftSize / 2 + 1, 0); Chris@37: Chris@37: if (m_params.useChromaFrequencyMap) { Chris@140: #ifdef DEBUG_FEATURE_EXTRACTOR Chris@37: cerr << "makeFreqMap: calling makeChromaFrequencyMap" << endl; Chris@37: #endif Chris@37: makeChromaFrequencyMap(); Chris@37: } else { Chris@140: #ifdef DEBUG_FEATURE_EXTRACTOR Chris@37: cerr << "makeFreqMap: calling makeStandardFrequencyMap" << endl; Chris@37: #endif Chris@37: makeStandardFrequencyMap(); Chris@37: } Chris@37: } Chris@37: Chris@37: void Chris@37: FeatureExtractor::makeStandardFrequencyMap() Chris@37: { Chris@169: // Our handling of the referenceFrequency parameter depends on the Chris@169: // frequency map in use. Chris@169: Chris@169: // With the chroma frequency map, we use referenceFrequency to set Chris@169: // up the chroma bin frequencies when constructing the map, and Chris@169: // then just follow the map (without having to refer to Chris@169: // referenceFrequency again) when we get the frequency-domain Chris@169: // audio. Chris@169: Chris@169: // With the standard frequency map, using referenceFrequency to Chris@169: // set up the map doesn't work so well -- it only really affects Chris@169: // the crossover frequency, and much of the useful information is Chris@169: // below that frequency. What we do instead is to ignore the Chris@169: // referenceFrequency when creating the map -- setting it up for Chris@169: // 440Hz -- and then use it to scale the individual Chris@169: // frequency-domain audio frames before applying the map to them. Chris@169: Chris@169: double refFreq = 440.; // See above -- *not* the parameter! Chris@180: double binWidth = double(m_params.sampleRate) / m_params.fftSize; Chris@188: int crossoverBin = int(2 / (pow(2, 1/12.0) - 1)); Chris@180: int crossoverMidi = int(log(crossoverBin * binWidth / refFreq)/ Chris@180: log(2.0) * 12 + 69 + 0.5); Chris@163: Chris@37: int i = 0; Chris@37: while (i <= crossoverBin) { Chris@176: double freq = i * binWidth; Chris@176: if (freq < m_params.minFrequency || freq > m_params.maxFrequency) { Chris@176: m_freqMap[i++] = -1; Chris@176: } else { Chris@176: m_freqMap[i] = i; Chris@176: i++; Chris@176: } Chris@37: } Chris@37: Chris@37: while (i <= m_params.fftSize/2) { Chris@176: double freq = i * binWidth; Chris@176: if (freq < m_params.minFrequency || freq > m_params.maxFrequency) { Chris@176: m_freqMap[i++] = -1; Chris@176: } else { Chris@176: double midi = log(freq / refFreq) / log(2.0) * 12 + 69; Chris@176: if (midi > 127) midi = 127; Chris@180: int target = crossoverBin + int(midi + 0.5) - crossoverMidi; Chris@176: if (target >= m_featureSize) target = m_featureSize - 1; Chris@176: m_freqMap[i++] = target; Chris@176: } Chris@37: } Chris@166: Chris@166: #ifdef DEBUG_FEATURE_EXTRACTOR Chris@166: cerr << "FeatureExtractor: crossover bin is " << crossoverBin << " for midi " Chris@166: << crossoverMidi << endl; Chris@176: cerr << "FeatureExtractor: map is:" << endl; Chris@176: for (i = 0; i <= m_params.fftSize/2; ++i) { Chris@176: cerr << i << ": " << m_freqMap[i] << ", "; Chris@176: } Chris@176: cerr << endl; Chris@166: #endif Chris@37: } Chris@37: Chris@37: void Chris@37: FeatureExtractor::makeChromaFrequencyMap() Chris@37: { Chris@159: double refFreq = m_params.referenceFrequency; Chris@180: double binWidth = double(m_params.sampleRate) / m_params.fftSize; Chris@188: int crossoverBin = int(1 / (pow(2, 1/12.0) - 1)); Chris@37: int i = 0; Chris@37: while (i <= crossoverBin) { Chris@176: double freq = i * binWidth; Chris@176: if (freq < m_params.minFrequency || freq > m_params.maxFrequency) { Chris@176: m_freqMap[i++] = -1; Chris@176: } else { Chris@176: m_freqMap[i++] = 0; Chris@176: } Chris@37: } Chris@37: while (i <= m_params.fftSize/2) { Chris@176: double freq = i * binWidth; Chris@176: if (freq < m_params.minFrequency || freq > m_params.maxFrequency) { Chris@176: m_freqMap[i++] = -1; Chris@176: } else { Chris@176: double midi = log(freq / refFreq) / log(2.0) * 12 + 69; Chris@180: m_freqMap[i++] = (int(midi + 0.5)) % 12 + 1; Chris@176: } Chris@37: } Chris@37: } Chris@37: Chris@183: feature_t Chris@37: FeatureExtractor::process(const vector &real, const vector &imag) Chris@37: { Chris@184: vector mags(m_params.fftSize/2 + 1, 0.0); Chris@184: Chris@184: for (int i = 0; i <= m_params.fftSize/2; i++) { Chris@184: mags[i] = float(real[i] * real[i] + imag[i] * imag[i]); Chris@184: } Chris@184: Chris@184: return processMags(mags); Chris@184: } Chris@184: Chris@184: feature_t Chris@184: FeatureExtractor::process(const vector &real, const vector &imag) Chris@184: { Chris@184: vector mags(m_params.fftSize/2 + 1, 0.0); Chris@169: Chris@169: for (int i = 0; i <= m_params.fftSize/2; i++) { Chris@169: mags[i] = real[i] * real[i] + imag[i] * imag[i]; Chris@169: } Chris@169: Chris@169: return processMags(mags); Chris@169: } Chris@169: Chris@183: feature_t Chris@201: FeatureExtractor::process(const float *real, const float *imag) Chris@201: { Chris@201: vector mags(m_params.fftSize/2 + 1, 0.0); Chris@201: Chris@201: for (int i = 0; i <= m_params.fftSize/2; i++) { Chris@201: mags[i] = real[i] * real[i] + imag[i] * imag[i]; Chris@201: } Chris@201: Chris@201: return processMags(mags); Chris@201: } Chris@201: Chris@201: feature_t Chris@169: FeatureExtractor::process(const float *cframe) Chris@169: { Chris@184: vector mags(m_params.fftSize/2 + 1, 0.0); Chris@169: Chris@169: for (int i = 0; i <= m_params.fftSize/2; i++) { Chris@169: mags[i] = cframe[i*2] * cframe[i*2] + cframe[i*2+1] * cframe[i*2+1]; Chris@169: } Chris@169: Chris@169: return processMags(mags); Chris@169: } Chris@169: Chris@183: feature_t Chris@184: FeatureExtractor::processMags(const vector &mags) Chris@169: { Chris@183: feature_t frame(m_featureSize, 0.0); Chris@169: Chris@169: if (!m_params.useChromaFrequencyMap && Chris@169: (m_params.referenceFrequency != 440.)) { Chris@169: Chris@169: // See comment in makeStandardFrequencyMap above Chris@184: vector scaled = scaleMags(mags); Chris@169: Chris@169: for (int i = 0; i <= m_params.fftSize/2; i++) { Chris@178: int index = m_freqMap[i]; Chris@178: if (index >= 0) { Chris@178: frame[index] += scaled[i]; Chris@178: } Chris@169: } Chris@169: Chris@169: } else { Chris@169: for (int i = 0; i <= m_params.fftSize/2; i++) { Chris@178: int index = m_freqMap[i]; Chris@178: if (index >= 0) { Chris@178: frame[index] += mags[i]; Chris@178: } Chris@176: } Chris@37: } Chris@37: Chris@103: return frame; Chris@74: } Chris@74: Chris@184: vector Chris@184: FeatureExtractor::scaleMags(const vector &mags) Chris@74: { Chris@169: // Scale the pitch content in the given magnitude spectrum to Chris@169: // accommodate a difference in tuning frequency (between the 440Hz Chris@169: // reference and the actual tuning frequency of the input audio). Chris@169: // We only do this when not using chroma features -- see the Chris@169: // comment in makeStandardFrequencyMap() above. Chris@169: Chris@169: if (m_params.useChromaFrequencyMap) return mags; Chris@169: Chris@184: double ratio = 440.f / m_params.referenceFrequency; Chris@169: Chris@180: int n = static_cast(mags.size()); Chris@169: Chris@184: vector scaled(n, 0.0); Chris@169: Chris@169: for (int target = 0; target < n; ++target) { Chris@169: Chris@169: double source = target / ratio; Chris@169: Chris@169: int lower = int(source); Chris@169: int higher = lower + 1; Chris@169: Chris@169: double lowerProp = higher - source; Chris@169: double higherProp = source - lower; Chris@169: Chris@169: double value = 0.0; Chris@169: if (lower >= 0 && lower < n) { Chris@169: value += lowerProp * mags[lower]; Chris@176: } Chris@169: if (higher >= 0 && higher < n) { Chris@169: value += higherProp * mags[higher]; Chris@169: } Chris@169: Chris@184: scaled[target] = float(value); Chris@74: } Chris@74: Chris@169: return scaled; Chris@74: } Chris@74: