annotate src/FeatureExtractor.cpp @ 169:001db4c32eb0 tuning-rescale

Alternative handling of reference frequency parameter -- scale the whole spectrum, don't just adjust the semitone bins above the crossover freq
author Chris Cannam
date Thu, 05 Feb 2015 16:26:41 +0000
parents d23dad16d6f9
children 1440773da492
rev   line source
Chris@37 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@37 2
Chris@37 3 /*
Chris@37 4 Vamp feature extraction plugin using the MATCH audio alignment
Chris@37 5 algorithm.
Chris@37 6
Chris@37 7 Centre for Digital Music, Queen Mary, University of London.
Chris@37 8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
Chris@37 9
Chris@37 10 This program is free software; you can redistribute it and/or
Chris@37 11 modify it under the terms of the GNU General Public License as
Chris@37 12 published by the Free Software Foundation; either version 2 of the
Chris@37 13 License, or (at your option) any later version. See the file
Chris@37 14 COPYING included with this distribution for more information.
Chris@37 15 */
Chris@37 16
Chris@37 17 #include "FeatureExtractor.h"
Chris@37 18
Chris@37 19 #include <iostream>
Chris@37 20
Chris@37 21 #include <cstdlib>
Chris@37 22 #include <cassert>
Chris@37 23 #include <cmath>
Chris@37 24
Chris@37 25 using namespace std;
Chris@37 26
Chris@166 27 #define DEBUG_FEATURE_EXTRACTOR 1
Chris@140 28
Chris@37 29 FeatureExtractor::FeatureExtractor(Parameters parameters) :
Chris@103 30 m_params(parameters)
Chris@37 31 {
Chris@74 32 m_featureSize = getFeatureSizeFor(parameters);
Chris@37 33 makeFreqMap();
Chris@140 34
Chris@140 35 #ifdef DEBUG_FEATURE_EXTRACTOR
Chris@140 36 cerr << "*** FeatureExtractor: sampleRate = " << parameters.sampleRate
Chris@140 37 << ", useChromaFrequencyMap = " << parameters.useChromaFrequencyMap
Chris@140 38 << ", fftSize = " << parameters.fftSize << endl;
Chris@140 39 #endif
Chris@37 40 }
Chris@37 41
Chris@74 42 int
Chris@74 43 FeatureExtractor::getFeatureSizeFor(Parameters parameters)
Chris@74 44 {
Chris@74 45 if (parameters.useChromaFrequencyMap) {
Chris@74 46 return 13;
Chris@74 47 } else {
Chris@74 48 return 84;
Chris@74 49 }
Chris@74 50 }
Chris@74 51
Chris@37 52 void
Chris@37 53 FeatureExtractor::makeFreqMap()
Chris@37 54 {
Chris@37 55 m_freqMap = vector<int>(m_params.fftSize / 2 + 1, 0);
Chris@37 56
Chris@37 57 if (m_params.useChromaFrequencyMap) {
Chris@140 58 #ifdef DEBUG_FEATURE_EXTRACTOR
Chris@37 59 cerr << "makeFreqMap: calling makeChromaFrequencyMap" << endl;
Chris@37 60 #endif
Chris@37 61 makeChromaFrequencyMap();
Chris@37 62 } else {
Chris@140 63 #ifdef DEBUG_FEATURE_EXTRACTOR
Chris@37 64 cerr << "makeFreqMap: calling makeStandardFrequencyMap" << endl;
Chris@37 65 #endif
Chris@37 66 makeStandardFrequencyMap();
Chris@37 67 }
Chris@37 68 }
Chris@37 69
Chris@37 70 void
Chris@37 71 FeatureExtractor::makeStandardFrequencyMap()
Chris@37 72 {
Chris@169 73 // Our handling of the referenceFrequency parameter depends on the
Chris@169 74 // frequency map in use.
Chris@169 75
Chris@169 76 // With the chroma frequency map, we use referenceFrequency to set
Chris@169 77 // up the chroma bin frequencies when constructing the map, and
Chris@169 78 // then just follow the map (without having to refer to
Chris@169 79 // referenceFrequency again) when we get the frequency-domain
Chris@169 80 // audio.
Chris@169 81
Chris@169 82 // With the standard frequency map, using referenceFrequency to
Chris@169 83 // set up the map doesn't work so well -- it only really affects
Chris@169 84 // the crossover frequency, and much of the useful information is
Chris@169 85 // below that frequency. What we do instead is to ignore the
Chris@169 86 // referenceFrequency when creating the map -- setting it up for
Chris@169 87 // 440Hz -- and then use it to scale the individual
Chris@169 88 // frequency-domain audio frames before applying the map to them.
Chris@169 89
Chris@169 90 double refFreq = 440.; // See above -- *not* the parameter!
Chris@37 91 double binWidth = m_params.sampleRate / m_params.fftSize;
Chris@37 92 int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1));
Chris@159 93 int crossoverMidi = lrint(log(crossoverBin * binWidth / refFreq)/
Chris@37 94 log(2.0) * 12 + 69);
Chris@163 95
Chris@37 96 int i = 0;
Chris@37 97 while (i <= crossoverBin) {
Chris@37 98 m_freqMap[i] = i;
Chris@37 99 ++i;
Chris@37 100 }
Chris@37 101
Chris@37 102 while (i <= m_params.fftSize/2) {
Chris@159 103 double midi = log(i * binWidth / refFreq) / log(2.0) * 12 + 69;
Chris@37 104 if (midi > 127) midi = 127;
Chris@40 105 int target = crossoverBin + lrint(midi) - crossoverMidi;
Chris@40 106 if (target >= m_featureSize) target = m_featureSize - 1;
Chris@40 107 m_freqMap[i++] = target;
Chris@37 108 }
Chris@166 109
Chris@166 110 #ifdef DEBUG_FEATURE_EXTRACTOR
Chris@166 111 cerr << "FeatureExtractor: crossover bin is " << crossoverBin << " for midi "
Chris@166 112 << crossoverMidi << endl;
Chris@166 113 #endif
Chris@37 114 }
Chris@37 115
Chris@37 116 void
Chris@37 117 FeatureExtractor::makeChromaFrequencyMap()
Chris@37 118 {
Chris@159 119 double refFreq = m_params.referenceFrequency;
Chris@37 120 double binWidth = m_params.sampleRate / m_params.fftSize;
Chris@37 121 int crossoverBin = (int)(1 / (pow(2, 1/12.0) - 1));
Chris@37 122 int i = 0;
Chris@37 123 while (i <= crossoverBin) {
Chris@37 124 m_freqMap[i++] = 0;
Chris@37 125 }
Chris@37 126 while (i <= m_params.fftSize/2) {
Chris@159 127 double midi = log(i * binWidth / refFreq) / log(2.0) * 12 + 69;
Chris@37 128 m_freqMap[i++] = (lrint(midi)) % 12 + 1;
Chris@37 129 }
Chris@37 130 }
Chris@37 131
Chris@37 132 vector<double>
Chris@37 133 FeatureExtractor::process(const vector<double> &real, const vector<double> &imag)
Chris@37 134 {
Chris@169 135 vector<double> mags(m_params.fftSize/2 + 1, 0.0);
Chris@169 136
Chris@169 137 for (int i = 0; i <= m_params.fftSize/2; i++) {
Chris@169 138 mags[i] = real[i] * real[i] + imag[i] * imag[i];
Chris@169 139 }
Chris@169 140
Chris@169 141 return processMags(mags);
Chris@169 142 }
Chris@169 143
Chris@169 144 vector<double>
Chris@169 145 FeatureExtractor::process(const float *cframe)
Chris@169 146 {
Chris@169 147 vector<double> mags(m_params.fftSize/2 + 1, 0.0);
Chris@169 148
Chris@169 149 for (int i = 0; i <= m_params.fftSize/2; i++) {
Chris@169 150 mags[i] = cframe[i*2] * cframe[i*2] + cframe[i*2+1] * cframe[i*2+1];
Chris@169 151 }
Chris@169 152
Chris@169 153 return processMags(mags);
Chris@169 154 }
Chris@169 155
Chris@169 156 vector<double>
Chris@169 157 FeatureExtractor::processMags(const vector<double> &mags)
Chris@169 158 {
Chris@37 159 vector<double> frame(m_featureSize, 0.0);
Chris@169 160
Chris@169 161 if (!m_params.useChromaFrequencyMap &&
Chris@169 162 (m_params.referenceFrequency != 440.)) {
Chris@169 163
Chris@169 164 // See comment in makeStandardFrequencyMap above
Chris@169 165 vector<double> scaled = scaleMags(mags);
Chris@169 166
Chris@169 167 for (int i = 0; i <= m_params.fftSize/2; i++) {
Chris@169 168 frame[m_freqMap[i]] += scaled[i];
Chris@169 169 }
Chris@169 170
Chris@169 171 } else {
Chris@169 172 for (int i = 0; i <= m_params.fftSize/2; i++) {
Chris@169 173 frame[m_freqMap[i]] += mags[i];
Chris@169 174 }
Chris@37 175 }
Chris@37 176
Chris@103 177 return frame;
Chris@74 178 }
Chris@74 179
Chris@74 180 vector<double>
Chris@169 181 FeatureExtractor::scaleMags(const vector<double> &mags)
Chris@74 182 {
Chris@169 183 // Scale the pitch content in the given magnitude spectrum to
Chris@169 184 // accommodate a difference in tuning frequency (between the 440Hz
Chris@169 185 // reference and the actual tuning frequency of the input audio).
Chris@169 186 // We only do this when not using chroma features -- see the
Chris@169 187 // comment in makeStandardFrequencyMap() above.
Chris@169 188
Chris@169 189 if (m_params.useChromaFrequencyMap) return mags;
Chris@169 190
Chris@169 191 double ratio = 440. / m_params.referenceFrequency;
Chris@169 192
Chris@169 193 int n = mags.size();
Chris@169 194
Chris@169 195 vector<double> scaled(n, 0.0);
Chris@169 196
Chris@169 197 for (int target = 0; target < n; ++target) {
Chris@169 198
Chris@169 199 double source = target / ratio;
Chris@169 200
Chris@169 201 int lower = int(source);
Chris@169 202 int higher = lower + 1;
Chris@169 203
Chris@169 204 double lowerProp = higher - source;
Chris@169 205 double higherProp = source - lower;
Chris@169 206
Chris@169 207 double value = 0.0;
Chris@169 208 if (lower >= 0 && lower < n) {
Chris@169 209 value += lowerProp * mags[lower];
Chris@169 210 }
Chris@169 211 if (higher >= 0 && higher < n) {
Chris@169 212 value += higherProp * mags[higher];
Chris@169 213 }
Chris@169 214
Chris@169 215 scaled[target] = value;
Chris@74 216 }
Chris@74 217
Chris@169 218 return scaled;
Chris@74 219 }
Chris@74 220