Mercurial > hg > match-vamp
view src/FeatureExtractor.cpp @ 74:b9aa663a607b refactors
Pull out feature extractor calls from Matcher, remove MatchFeeder, have only the feeder-from-features and use that in MatchVampPlugin
author | Chris Cannam |
---|---|
date | Wed, 19 Nov 2014 11:59:03 +0000 |
parents | 15a7fdc02c58 |
children | 593054bf6476 |
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ /* Vamp feature extraction plugin using the MATCH audio alignment algorithm. Centre for Digital Music, Queen Mary, University of London. This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. See the file COPYING included with this distribution for more information. */ #include "FeatureExtractor.h" #include <iostream> #include <cstdlib> #include <cassert> #include <cmath> using namespace std; FeatureExtractor::FeatureExtractor(Parameters parameters) : m_params(parameters), m_ltAverage(0) { m_featureSize = getFeatureSizeFor(parameters); m_prevFrame = vector<double>(m_featureSize, 0.0); makeFreqMap(); } int FeatureExtractor::getFeatureSizeFor(Parameters parameters) { if (parameters.useChromaFrequencyMap) { return 13; } else { return 84; } } void FeatureExtractor::makeFreqMap() { m_freqMap = vector<int>(m_params.fftSize / 2 + 1, 0); if (m_params.useChromaFrequencyMap) { #ifdef DEBUG_MATCHER cerr << "makeFreqMap: calling makeChromaFrequencyMap" << endl; #endif makeChromaFrequencyMap(); } else { #ifdef DEBUG_MATCHER cerr << "makeFreqMap: calling makeStandardFrequencyMap" << endl; #endif makeStandardFrequencyMap(); } } void FeatureExtractor::makeStandardFrequencyMap() { double binWidth = m_params.sampleRate / m_params.fftSize; int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1)); int crossoverMidi = lrint(log(crossoverBin*binWidth/440.0)/ log(2.0) * 12 + 69); // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth; int i = 0; while (i <= crossoverBin) { m_freqMap[i] = i; ++i; } while (i <= m_params.fftSize/2) { double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69; if (midi > 127) midi = 127; int target = crossoverBin + lrint(midi) - crossoverMidi; if (target >= m_featureSize) target = m_featureSize - 1; m_freqMap[i++] = target; } } void FeatureExtractor::makeChromaFrequencyMap() { double binWidth = m_params.sampleRate / m_params.fftSize; int crossoverBin = (int)(1 / (pow(2, 1/12.0) - 1)); int i = 0; while (i <= crossoverBin) { m_freqMap[i++] = 0; } while (i <= m_params.fftSize/2) { double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69; m_freqMap[i++] = (lrint(midi)) % 12 + 1; } } vector<double> FeatureExtractor::process(const vector<double> &real, const vector<double> &imag) { vector<double> frame(m_featureSize, 0.0); double rms = 0; for (int i = 0; i <= m_params.fftSize/2; i++) { double mag = real[i] * real[i] + imag[i] * imag[i]; rms += mag; frame[m_freqMap[i]] += mag; } rms = sqrt(rms / (m_params.fftSize/2)); return postProcess(frame, rms); } vector<double> FeatureExtractor::process(const float *cframe) { vector<double> frame(m_featureSize, 0.0); double rms = 0; for (int i = 0; i <= m_params.fftSize/2; i++) { double mag = cframe[i*2] * cframe[i*2] + cframe[i*2+1] * cframe[i*2+1]; rms += mag; frame[m_freqMap[i]] += mag; } rms = sqrt(rms / (m_params.fftSize/2)); return postProcess(frame, rms); } vector<double> FeatureExtractor::postProcess(const vector<double> &frame, double rms) { vector<double> feature(m_featureSize, 0.0); double totalEnergy = 0; if (m_params.useSpectralDifference) { for (int i = 0; i < m_featureSize; i++) { totalEnergy += frame[i]; if (frame[i] > m_prevFrame[i]) { feature[i] = frame[i] - m_prevFrame[i]; } else { feature[i] = 0; } } } else { for (int i = 0; i < m_featureSize; i++) { feature[i] = frame[i]; totalEnergy += feature[i]; } } if (m_ltAverage == 0) { m_ltAverage = totalEnergy; } else { double decay = m_params.decay; m_ltAverage = m_ltAverage * decay + totalEnergy * (1.0 - decay); } if (rms <= m_params.silenceThreshold) { for (int i = 0; i < m_featureSize; i++) { feature[i] = 0; } } else if (m_params.frameNorm == NormaliseFrameToSum1) { for (int i = 0; i < m_featureSize; i++) { feature[i] /= totalEnergy; } } else if (m_params.frameNorm == NormaliseFrameToLTAverage) { for (int i = 0; i < m_featureSize; i++) { feature[i] /= m_ltAverage; } } m_prevFrame = frame; return feature; }