annotate src/FeatureExtractor.cpp @ 40:15a7fdc02c58 refactors

Merge from default branch
author Chris Cannam
date Thu, 13 Nov 2014 12:57:04 +0000
parents 8cce4e13ede3
children b9aa663a607b
rev   line source
Chris@37 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@37 2
Chris@37 3 /*
Chris@37 4 Vamp feature extraction plugin using the MATCH audio alignment
Chris@37 5 algorithm.
Chris@37 6
Chris@37 7 Centre for Digital Music, Queen Mary, University of London.
Chris@37 8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
Chris@37 9
Chris@37 10 This program is free software; you can redistribute it and/or
Chris@37 11 modify it under the terms of the GNU General Public License as
Chris@37 12 published by the Free Software Foundation; either version 2 of the
Chris@37 13 License, or (at your option) any later version. See the file
Chris@37 14 COPYING included with this distribution for more information.
Chris@37 15 */
Chris@37 16
Chris@37 17 #include "FeatureExtractor.h"
Chris@37 18
Chris@37 19 #include <iostream>
Chris@37 20
Chris@37 21 #include <cstdlib>
Chris@37 22 #include <cassert>
Chris@37 23 #include <cmath>
Chris@37 24
Chris@37 25 using namespace std;
Chris@37 26
Chris@37 27 FeatureExtractor::FeatureExtractor(Parameters parameters) :
Chris@37 28 m_params(parameters),
Chris@37 29 m_ltAverage(0)
Chris@37 30 {
Chris@37 31 if (m_params.useChromaFrequencyMap) {
Chris@37 32 m_featureSize = 13;
Chris@37 33 } else {
Chris@37 34 m_featureSize = 84;
Chris@37 35 }
Chris@37 36
Chris@37 37 m_prevFrame = vector<double>(m_featureSize, 0.0);
Chris@37 38
Chris@37 39 makeFreqMap();
Chris@37 40 }
Chris@37 41
Chris@37 42 void
Chris@37 43 FeatureExtractor::makeFreqMap()
Chris@37 44 {
Chris@37 45 m_freqMap = vector<int>(m_params.fftSize / 2 + 1, 0);
Chris@37 46
Chris@37 47 if (m_params.useChromaFrequencyMap) {
Chris@37 48 #ifdef DEBUG_MATCHER
Chris@37 49 cerr << "makeFreqMap: calling makeChromaFrequencyMap" << endl;
Chris@37 50 #endif
Chris@37 51 makeChromaFrequencyMap();
Chris@37 52 } else {
Chris@37 53 #ifdef DEBUG_MATCHER
Chris@37 54 cerr << "makeFreqMap: calling makeStandardFrequencyMap" << endl;
Chris@37 55 #endif
Chris@37 56 makeStandardFrequencyMap();
Chris@37 57 }
Chris@37 58 }
Chris@37 59
Chris@37 60 void
Chris@37 61 FeatureExtractor::makeStandardFrequencyMap()
Chris@37 62 {
Chris@37 63 double binWidth = m_params.sampleRate / m_params.fftSize;
Chris@37 64 int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1));
Chris@37 65 int crossoverMidi = lrint(log(crossoverBin*binWidth/440.0)/
Chris@37 66 log(2.0) * 12 + 69);
Chris@37 67
Chris@37 68 // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth;
Chris@37 69
Chris@37 70 int i = 0;
Chris@37 71 while (i <= crossoverBin) {
Chris@37 72 m_freqMap[i] = i;
Chris@37 73 ++i;
Chris@37 74 }
Chris@37 75
Chris@37 76 while (i <= m_params.fftSize/2) {
Chris@37 77 double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69;
Chris@37 78 if (midi > 127) midi = 127;
Chris@40 79 int target = crossoverBin + lrint(midi) - crossoverMidi;
Chris@40 80 if (target >= m_featureSize) target = m_featureSize - 1;
Chris@40 81 m_freqMap[i++] = target;
Chris@37 82 }
Chris@37 83 }
Chris@37 84
Chris@37 85 void
Chris@37 86 FeatureExtractor::makeChromaFrequencyMap()
Chris@37 87 {
Chris@37 88 double binWidth = m_params.sampleRate / m_params.fftSize;
Chris@37 89 int crossoverBin = (int)(1 / (pow(2, 1/12.0) - 1));
Chris@37 90 int i = 0;
Chris@37 91 while (i <= crossoverBin) {
Chris@37 92 m_freqMap[i++] = 0;
Chris@37 93 }
Chris@37 94 while (i <= m_params.fftSize/2) {
Chris@37 95 double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69;
Chris@37 96 m_freqMap[i++] = (lrint(midi)) % 12 + 1;
Chris@37 97 }
Chris@37 98 }
Chris@37 99
Chris@37 100 vector<double>
Chris@37 101 FeatureExtractor::process(const vector<double> &real, const vector<double> &imag)
Chris@37 102 {
Chris@37 103 vector<double> frame(m_featureSize, 0.0);
Chris@37 104
Chris@37 105 double rms = 0;
Chris@37 106 for (int i = 0; i <= m_params.fftSize/2; i++) {
Chris@37 107 double mag = real[i] * real[i] + imag[i] * imag[i];
Chris@37 108 rms += mag;
Chris@37 109 frame[m_freqMap[i]] += mag;
Chris@37 110 }
Chris@37 111 rms = sqrt(rms / (m_params.fftSize/2));
Chris@37 112
Chris@37 113 vector<double> feature(m_featureSize, 0.0);
Chris@37 114
Chris@37 115 double totalEnergy = 0;
Chris@37 116 if (m_params.useSpectralDifference) {
Chris@37 117 for (int i = 0; i < m_featureSize; i++) {
Chris@37 118 totalEnergy += frame[i];
Chris@37 119 if (frame[i] > m_prevFrame[i]) {
Chris@37 120 feature[i] = frame[i] - m_prevFrame[i];
Chris@37 121 } else {
Chris@37 122 feature[i] = 0;
Chris@37 123 }
Chris@37 124 }
Chris@37 125 } else {
Chris@37 126 for (int i = 0; i < m_featureSize; i++) {
Chris@37 127 feature[i] = frame[i];
Chris@37 128 totalEnergy += feature[i];
Chris@37 129 }
Chris@37 130 }
Chris@37 131
Chris@37 132 if (m_ltAverage == 0) {
Chris@37 133 m_ltAverage = totalEnergy;
Chris@37 134 } else {
Chris@37 135 double decay = m_params.decay;
Chris@37 136 m_ltAverage = m_ltAverage * decay + totalEnergy * (1.0 - decay);
Chris@37 137 }
Chris@37 138
Chris@37 139 if (rms <= m_params.silenceThreshold) {
Chris@37 140 for (int i = 0; i < m_featureSize; i++) {
Chris@37 141 feature[i] = 0;
Chris@37 142 }
Chris@37 143 } else if (m_params.frameNorm == NormaliseFrameToSum1) {
Chris@37 144 for (int i = 0; i < m_featureSize; i++) {
Chris@37 145 feature[i] /= totalEnergy;
Chris@37 146 }
Chris@37 147 } else if (m_params.frameNorm == NormaliseFrameToLTAverage) {
Chris@37 148 for (int i = 0; i < m_featureSize; i++) {
Chris@37 149 feature[i] /= m_ltAverage;
Chris@37 150 }
Chris@37 151 }
Chris@37 152
Chris@37 153 m_prevFrame = frame;
Chris@37 154 return feature;
Chris@37 155 }
Chris@37 156