Chris@37: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@37: Chris@37: /* Chris@37: Vamp feature extraction plugin using the MATCH audio alignment Chris@37: algorithm. Chris@37: Chris@37: Centre for Digital Music, Queen Mary, University of London. Chris@37: This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. Chris@37: Chris@37: This program is free software; you can redistribute it and/or Chris@37: modify it under the terms of the GNU General Public License as Chris@37: published by the Free Software Foundation; either version 2 of the Chris@37: License, or (at your option) any later version. See the file Chris@37: COPYING included with this distribution for more information. Chris@37: */ Chris@37: Chris@37: #include "FeatureExtractor.h" Chris@37: Chris@37: #include Chris@37: Chris@37: #include Chris@37: #include Chris@37: #include Chris@37: Chris@37: using namespace std; Chris@37: Chris@166: #define DEBUG_FEATURE_EXTRACTOR 1 Chris@140: Chris@37: FeatureExtractor::FeatureExtractor(Parameters parameters) : Chris@103: m_params(parameters) Chris@37: { Chris@74: m_featureSize = getFeatureSizeFor(parameters); Chris@37: makeFreqMap(); Chris@140: Chris@140: #ifdef DEBUG_FEATURE_EXTRACTOR Chris@140: cerr << "*** FeatureExtractor: sampleRate = " << parameters.sampleRate Chris@140: << ", useChromaFrequencyMap = " << parameters.useChromaFrequencyMap Chris@140: << ", fftSize = " << parameters.fftSize << endl; Chris@140: #endif Chris@37: } Chris@37: Chris@74: int Chris@74: FeatureExtractor::getFeatureSizeFor(Parameters parameters) Chris@74: { Chris@74: if (parameters.useChromaFrequencyMap) { Chris@74: return 13; Chris@74: } else { Chris@74: return 84; Chris@74: } Chris@74: } Chris@74: Chris@37: void Chris@37: FeatureExtractor::makeFreqMap() Chris@37: { Chris@37: m_freqMap = vector(m_params.fftSize / 2 + 1, 0); Chris@37: Chris@37: if (m_params.useChromaFrequencyMap) { Chris@140: #ifdef DEBUG_FEATURE_EXTRACTOR Chris@37: cerr << "makeFreqMap: calling makeChromaFrequencyMap" << endl; Chris@37: #endif Chris@37: makeChromaFrequencyMap(); Chris@37: } else { Chris@140: #ifdef DEBUG_FEATURE_EXTRACTOR Chris@37: cerr << "makeFreqMap: calling makeStandardFrequencyMap" << endl; Chris@37: #endif Chris@37: makeStandardFrequencyMap(); Chris@37: } Chris@37: } Chris@37: Chris@37: void Chris@37: FeatureExtractor::makeStandardFrequencyMap() Chris@37: { Chris@169: // Our handling of the referenceFrequency parameter depends on the Chris@169: // frequency map in use. Chris@169: Chris@169: // With the chroma frequency map, we use referenceFrequency to set Chris@169: // up the chroma bin frequencies when constructing the map, and Chris@169: // then just follow the map (without having to refer to Chris@169: // referenceFrequency again) when we get the frequency-domain Chris@169: // audio. Chris@169: Chris@169: // With the standard frequency map, using referenceFrequency to Chris@169: // set up the map doesn't work so well -- it only really affects Chris@169: // the crossover frequency, and much of the useful information is Chris@169: // below that frequency. What we do instead is to ignore the Chris@169: // referenceFrequency when creating the map -- setting it up for Chris@169: // 440Hz -- and then use it to scale the individual Chris@169: // frequency-domain audio frames before applying the map to them. Chris@169: Chris@169: double refFreq = 440.; // See above -- *not* the parameter! Chris@37: double binWidth = m_params.sampleRate / m_params.fftSize; Chris@37: int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1)); Chris@159: int crossoverMidi = lrint(log(crossoverBin * binWidth / refFreq)/ Chris@37: log(2.0) * 12 + 69); Chris@163: Chris@37: int i = 0; Chris@37: while (i <= crossoverBin) { Chris@37: m_freqMap[i] = i; Chris@37: ++i; Chris@37: } Chris@37: Chris@37: while (i <= m_params.fftSize/2) { Chris@159: double midi = log(i * binWidth / refFreq) / log(2.0) * 12 + 69; Chris@37: if (midi > 127) midi = 127; Chris@40: int target = crossoverBin + lrint(midi) - crossoverMidi; Chris@40: if (target >= m_featureSize) target = m_featureSize - 1; Chris@40: m_freqMap[i++] = target; Chris@37: } Chris@166: Chris@166: #ifdef DEBUG_FEATURE_EXTRACTOR Chris@166: cerr << "FeatureExtractor: crossover bin is " << crossoverBin << " for midi " Chris@166: << crossoverMidi << endl; Chris@166: #endif Chris@37: } Chris@37: Chris@37: void Chris@37: FeatureExtractor::makeChromaFrequencyMap() Chris@37: { Chris@159: double refFreq = m_params.referenceFrequency; Chris@37: double binWidth = m_params.sampleRate / m_params.fftSize; Chris@37: int crossoverBin = (int)(1 / (pow(2, 1/12.0) - 1)); Chris@37: int i = 0; Chris@37: while (i <= crossoverBin) { Chris@37: m_freqMap[i++] = 0; Chris@37: } Chris@37: while (i <= m_params.fftSize/2) { Chris@159: double midi = log(i * binWidth / refFreq) / log(2.0) * 12 + 69; Chris@37: m_freqMap[i++] = (lrint(midi)) % 12 + 1; Chris@37: } Chris@37: } Chris@37: Chris@37: vector Chris@37: FeatureExtractor::process(const vector &real, const vector &imag) Chris@37: { Chris@169: vector mags(m_params.fftSize/2 + 1, 0.0); Chris@169: Chris@169: for (int i = 0; i <= m_params.fftSize/2; i++) { Chris@169: mags[i] = real[i] * real[i] + imag[i] * imag[i]; Chris@169: } Chris@169: Chris@169: return processMags(mags); Chris@169: } Chris@169: Chris@169: vector Chris@169: FeatureExtractor::process(const float *cframe) Chris@169: { Chris@169: vector mags(m_params.fftSize/2 + 1, 0.0); Chris@169: Chris@169: for (int i = 0; i <= m_params.fftSize/2; i++) { Chris@169: mags[i] = cframe[i*2] * cframe[i*2] + cframe[i*2+1] * cframe[i*2+1]; Chris@169: } Chris@169: Chris@169: return processMags(mags); Chris@169: } Chris@169: Chris@169: vector Chris@169: FeatureExtractor::processMags(const vector &mags) Chris@169: { Chris@37: vector frame(m_featureSize, 0.0); Chris@169: Chris@169: if (!m_params.useChromaFrequencyMap && Chris@169: (m_params.referenceFrequency != 440.)) { Chris@169: Chris@169: // See comment in makeStandardFrequencyMap above Chris@169: vector scaled = scaleMags(mags); Chris@169: Chris@169: for (int i = 0; i <= m_params.fftSize/2; i++) { Chris@169: frame[m_freqMap[i]] += scaled[i]; Chris@169: } Chris@169: Chris@169: } else { Chris@169: for (int i = 0; i <= m_params.fftSize/2; i++) { Chris@169: frame[m_freqMap[i]] += mags[i]; Chris@169: } Chris@37: } Chris@37: Chris@103: return frame; Chris@74: } Chris@74: Chris@74: vector Chris@169: FeatureExtractor::scaleMags(const vector &mags) Chris@74: { Chris@169: // Scale the pitch content in the given magnitude spectrum to Chris@169: // accommodate a difference in tuning frequency (between the 440Hz Chris@169: // reference and the actual tuning frequency of the input audio). Chris@169: // We only do this when not using chroma features -- see the Chris@169: // comment in makeStandardFrequencyMap() above. Chris@169: Chris@169: if (m_params.useChromaFrequencyMap) return mags; Chris@169: Chris@169: double ratio = 440. / m_params.referenceFrequency; Chris@169: Chris@169: int n = mags.size(); Chris@169: Chris@169: vector scaled(n, 0.0); Chris@169: Chris@169: for (int target = 0; target < n; ++target) { Chris@169: Chris@169: double source = target / ratio; Chris@169: Chris@169: int lower = int(source); Chris@169: int higher = lower + 1; Chris@169: Chris@169: double lowerProp = higher - source; Chris@169: double higherProp = source - lower; Chris@169: Chris@169: double value = 0.0; Chris@169: if (lower >= 0 && lower < n) { Chris@169: value += lowerProp * mags[lower]; Chris@169: } Chris@169: if (higher >= 0 && higher < n) { Chris@169: value += higherProp * mags[higher]; Chris@169: } Chris@169: Chris@169: scaled[target] = value; Chris@74: } Chris@74: Chris@169: return scaled; Chris@74: } Chris@74: