annotate src/FeatureExtractor.cpp @ 178:1440773da492 tuning-rescale

Merge from refactors branch
author Chris Cannam
date Mon, 16 Feb 2015 14:47:43 +0000
parents 001db4c32eb0 50bf5c5bca34
children d1bc89794cd4
rev   line source
Chris@37 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@37 2
Chris@37 3 /*
Chris@37 4 Vamp feature extraction plugin using the MATCH audio alignment
Chris@37 5 algorithm.
Chris@37 6
Chris@37 7 Centre for Digital Music, Queen Mary, University of London.
Chris@37 8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
Chris@37 9
Chris@37 10 This program is free software; you can redistribute it and/or
Chris@37 11 modify it under the terms of the GNU General Public License as
Chris@37 12 published by the Free Software Foundation; either version 2 of the
Chris@37 13 License, or (at your option) any later version. See the file
Chris@37 14 COPYING included with this distribution for more information.
Chris@37 15 */
Chris@37 16
Chris@37 17 #include "FeatureExtractor.h"
Chris@37 18
Chris@37 19 #include <iostream>
Chris@37 20
Chris@37 21 #include <cstdlib>
Chris@37 22 #include <cassert>
Chris@37 23 #include <cmath>
Chris@37 24
Chris@37 25 using namespace std;
Chris@37 26
Chris@174 27 //#define DEBUG_FEATURE_EXTRACTOR 1
Chris@140 28
Chris@37 29 FeatureExtractor::FeatureExtractor(Parameters parameters) :
Chris@103 30 m_params(parameters)
Chris@37 31 {
Chris@74 32 m_featureSize = getFeatureSizeFor(parameters);
Chris@37 33 makeFreqMap();
Chris@140 34
Chris@140 35 #ifdef DEBUG_FEATURE_EXTRACTOR
Chris@140 36 cerr << "*** FeatureExtractor: sampleRate = " << parameters.sampleRate
Chris@140 37 << ", useChromaFrequencyMap = " << parameters.useChromaFrequencyMap
Chris@140 38 << ", fftSize = " << parameters.fftSize << endl;
Chris@140 39 #endif
Chris@37 40 }
Chris@37 41
Chris@74 42 int
Chris@74 43 FeatureExtractor::getFeatureSizeFor(Parameters parameters)
Chris@74 44 {
Chris@74 45 if (parameters.useChromaFrequencyMap) {
Chris@74 46 return 13;
Chris@74 47 } else {
Chris@74 48 return 84;
Chris@74 49 }
Chris@74 50 }
Chris@74 51
Chris@37 52 void
Chris@37 53 FeatureExtractor::makeFreqMap()
Chris@37 54 {
Chris@37 55 m_freqMap = vector<int>(m_params.fftSize / 2 + 1, 0);
Chris@37 56
Chris@37 57 if (m_params.useChromaFrequencyMap) {
Chris@140 58 #ifdef DEBUG_FEATURE_EXTRACTOR
Chris@37 59 cerr << "makeFreqMap: calling makeChromaFrequencyMap" << endl;
Chris@37 60 #endif
Chris@37 61 makeChromaFrequencyMap();
Chris@37 62 } else {
Chris@140 63 #ifdef DEBUG_FEATURE_EXTRACTOR
Chris@37 64 cerr << "makeFreqMap: calling makeStandardFrequencyMap" << endl;
Chris@37 65 #endif
Chris@37 66 makeStandardFrequencyMap();
Chris@37 67 }
Chris@37 68 }
Chris@37 69
Chris@37 70 void
Chris@37 71 FeatureExtractor::makeStandardFrequencyMap()
Chris@37 72 {
Chris@169 73 // Our handling of the referenceFrequency parameter depends on the
Chris@169 74 // frequency map in use.
Chris@169 75
Chris@169 76 // With the chroma frequency map, we use referenceFrequency to set
Chris@169 77 // up the chroma bin frequencies when constructing the map, and
Chris@169 78 // then just follow the map (without having to refer to
Chris@169 79 // referenceFrequency again) when we get the frequency-domain
Chris@169 80 // audio.
Chris@169 81
Chris@169 82 // With the standard frequency map, using referenceFrequency to
Chris@169 83 // set up the map doesn't work so well -- it only really affects
Chris@169 84 // the crossover frequency, and much of the useful information is
Chris@169 85 // below that frequency. What we do instead is to ignore the
Chris@169 86 // referenceFrequency when creating the map -- setting it up for
Chris@169 87 // 440Hz -- and then use it to scale the individual
Chris@169 88 // frequency-domain audio frames before applying the map to them.
Chris@169 89
Chris@169 90 double refFreq = 440.; // See above -- *not* the parameter!
Chris@37 91 double binWidth = m_params.sampleRate / m_params.fftSize;
Chris@37 92 int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1));
Chris@159 93 int crossoverMidi = lrint(log(crossoverBin * binWidth / refFreq)/
Chris@37 94 log(2.0) * 12 + 69);
Chris@163 95
Chris@37 96 int i = 0;
Chris@37 97 while (i <= crossoverBin) {
Chris@176 98 double freq = i * binWidth;
Chris@176 99 if (freq < m_params.minFrequency || freq > m_params.maxFrequency) {
Chris@176 100 m_freqMap[i++] = -1;
Chris@176 101 } else {
Chris@176 102 m_freqMap[i] = i;
Chris@176 103 i++;
Chris@176 104 }
Chris@37 105 }
Chris@37 106
Chris@37 107 while (i <= m_params.fftSize/2) {
Chris@176 108 double freq = i * binWidth;
Chris@176 109 if (freq < m_params.minFrequency || freq > m_params.maxFrequency) {
Chris@176 110 m_freqMap[i++] = -1;
Chris@176 111 } else {
Chris@176 112 double midi = log(freq / refFreq) / log(2.0) * 12 + 69;
Chris@176 113 if (midi > 127) midi = 127;
Chris@176 114 int target = crossoverBin + lrint(midi) - crossoverMidi;
Chris@176 115 if (target >= m_featureSize) target = m_featureSize - 1;
Chris@176 116 m_freqMap[i++] = target;
Chris@176 117 }
Chris@37 118 }
Chris@166 119
Chris@166 120 #ifdef DEBUG_FEATURE_EXTRACTOR
Chris@166 121 cerr << "FeatureExtractor: crossover bin is " << crossoverBin << " for midi "
Chris@166 122 << crossoverMidi << endl;
Chris@176 123 cerr << "FeatureExtractor: map is:" << endl;
Chris@176 124 for (i = 0; i <= m_params.fftSize/2; ++i) {
Chris@176 125 cerr << i << ": " << m_freqMap[i] << ", ";
Chris@176 126 }
Chris@176 127 cerr << endl;
Chris@166 128 #endif
Chris@37 129 }
Chris@37 130
Chris@37 131 void
Chris@37 132 FeatureExtractor::makeChromaFrequencyMap()
Chris@37 133 {
Chris@159 134 double refFreq = m_params.referenceFrequency;
Chris@37 135 double binWidth = m_params.sampleRate / m_params.fftSize;
Chris@37 136 int crossoverBin = (int)(1 / (pow(2, 1/12.0) - 1));
Chris@37 137 int i = 0;
Chris@37 138 while (i <= crossoverBin) {
Chris@176 139 double freq = i * binWidth;
Chris@176 140 if (freq < m_params.minFrequency || freq > m_params.maxFrequency) {
Chris@176 141 m_freqMap[i++] = -1;
Chris@176 142 } else {
Chris@176 143 m_freqMap[i++] = 0;
Chris@176 144 }
Chris@37 145 }
Chris@37 146 while (i <= m_params.fftSize/2) {
Chris@176 147 double freq = i * binWidth;
Chris@176 148 if (freq < m_params.minFrequency || freq > m_params.maxFrequency) {
Chris@176 149 m_freqMap[i++] = -1;
Chris@176 150 } else {
Chris@176 151 double midi = log(freq / refFreq) / log(2.0) * 12 + 69;
Chris@176 152 m_freqMap[i++] = (lrint(midi)) % 12 + 1;
Chris@176 153 }
Chris@37 154 }
Chris@37 155 }
Chris@37 156
Chris@37 157 vector<double>
Chris@37 158 FeatureExtractor::process(const vector<double> &real, const vector<double> &imag)
Chris@37 159 {
Chris@169 160 vector<double> mags(m_params.fftSize/2 + 1, 0.0);
Chris@169 161
Chris@169 162 for (int i = 0; i <= m_params.fftSize/2; i++) {
Chris@169 163 mags[i] = real[i] * real[i] + imag[i] * imag[i];
Chris@169 164 }
Chris@169 165
Chris@169 166 return processMags(mags);
Chris@169 167 }
Chris@169 168
Chris@169 169 vector<double>
Chris@169 170 FeatureExtractor::process(const float *cframe)
Chris@169 171 {
Chris@169 172 vector<double> mags(m_params.fftSize/2 + 1, 0.0);
Chris@169 173
Chris@169 174 for (int i = 0; i <= m_params.fftSize/2; i++) {
Chris@169 175 mags[i] = cframe[i*2] * cframe[i*2] + cframe[i*2+1] * cframe[i*2+1];
Chris@169 176 }
Chris@169 177
Chris@169 178 return processMags(mags);
Chris@169 179 }
Chris@169 180
Chris@169 181 vector<double>
Chris@169 182 FeatureExtractor::processMags(const vector<double> &mags)
Chris@169 183 {
Chris@37 184 vector<double> frame(m_featureSize, 0.0);
Chris@169 185
Chris@169 186 if (!m_params.useChromaFrequencyMap &&
Chris@169 187 (m_params.referenceFrequency != 440.)) {
Chris@169 188
Chris@169 189 // See comment in makeStandardFrequencyMap above
Chris@169 190 vector<double> scaled = scaleMags(mags);
Chris@169 191
Chris@169 192 for (int i = 0; i <= m_params.fftSize/2; i++) {
Chris@178 193 int index = m_freqMap[i];
Chris@178 194 if (index >= 0) {
Chris@178 195 frame[index] += scaled[i];
Chris@178 196 }
Chris@169 197 }
Chris@169 198
Chris@169 199 } else {
Chris@169 200 for (int i = 0; i <= m_params.fftSize/2; i++) {
Chris@178 201 int index = m_freqMap[i];
Chris@178 202 if (index >= 0) {
Chris@178 203 frame[index] += mags[i];
Chris@178 204 }
Chris@176 205 }
Chris@37 206 }
Chris@37 207
Chris@103 208 return frame;
Chris@74 209 }
Chris@74 210
Chris@74 211 vector<double>
Chris@169 212 FeatureExtractor::scaleMags(const vector<double> &mags)
Chris@74 213 {
Chris@169 214 // Scale the pitch content in the given magnitude spectrum to
Chris@169 215 // accommodate a difference in tuning frequency (between the 440Hz
Chris@169 216 // reference and the actual tuning frequency of the input audio).
Chris@169 217 // We only do this when not using chroma features -- see the
Chris@169 218 // comment in makeStandardFrequencyMap() above.
Chris@169 219
Chris@169 220 if (m_params.useChromaFrequencyMap) return mags;
Chris@169 221
Chris@169 222 double ratio = 440. / m_params.referenceFrequency;
Chris@169 223
Chris@169 224 int n = mags.size();
Chris@169 225
Chris@169 226 vector<double> scaled(n, 0.0);
Chris@169 227
Chris@169 228 for (int target = 0; target < n; ++target) {
Chris@169 229
Chris@169 230 double source = target / ratio;
Chris@169 231
Chris@169 232 int lower = int(source);
Chris@169 233 int higher = lower + 1;
Chris@169 234
Chris@169 235 double lowerProp = higher - source;
Chris@169 236 double higherProp = source - lower;
Chris@169 237
Chris@169 238 double value = 0.0;
Chris@169 239 if (lower >= 0 && lower < n) {
Chris@169 240 value += lowerProp * mags[lower];
Chris@176 241 }
Chris@169 242 if (higher >= 0 && higher < n) {
Chris@169 243 value += higherProp * mags[higher];
Chris@169 244 }
Chris@169 245
Chris@169 246 scaled[target] = value;
Chris@74 247 }
Chris@74 248
Chris@169 249 return scaled;
Chris@74 250 }
Chris@74 251