annotate src/FeatureExtractor.cpp @ 246:aac9ad4064ea subsequence tip

Fix incorrect handling of silent tail in the non-subsequence MATCH phase; some debug output changes
author Chris Cannam
date Fri, 24 Jul 2020 14:29:55 +0100
parents 39fe8728e1ca
children
rev   line source
Chris@37 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@37 2
Chris@37 3 /*
Chris@37 4 Vamp feature extraction plugin using the MATCH audio alignment
Chris@37 5 algorithm.
Chris@37 6
Chris@37 7 Centre for Digital Music, Queen Mary, University of London.
Chris@236 8 Copyright (c) 2007-2020 Simon Dixon, Chris Cannam, and Queen Mary
Chris@230 9 University of London, Copyright (c) 2014-2015 Tido GmbH.
Chris@37 10
Chris@37 11 This program is free software; you can redistribute it and/or
Chris@37 12 modify it under the terms of the GNU General Public License as
Chris@37 13 published by the Free Software Foundation; either version 2 of the
Chris@37 14 License, or (at your option) any later version. See the file
Chris@37 15 COPYING included with this distribution for more information.
Chris@37 16 */
Chris@37 17
Chris@37 18 #include "FeatureExtractor.h"
Chris@37 19
Chris@37 20 #include <iostream>
Chris@37 21
Chris@37 22 #include <cstdlib>
Chris@37 23 #include <cassert>
Chris@37 24 #include <cmath>
Chris@37 25
Chris@37 26 using namespace std;
Chris@37 27
Chris@174 28 //#define DEBUG_FEATURE_EXTRACTOR 1
Chris@140 29
Chris@37 30 FeatureExtractor::FeatureExtractor(Parameters parameters) :
Chris@103 31 m_params(parameters)
Chris@37 32 {
Chris@74 33 m_featureSize = getFeatureSizeFor(parameters);
Chris@37 34 makeFreqMap();
Chris@140 35
Chris@140 36 #ifdef DEBUG_FEATURE_EXTRACTOR
Chris@140 37 cerr << "*** FeatureExtractor: sampleRate = " << parameters.sampleRate
Chris@140 38 << ", useChromaFrequencyMap = " << parameters.useChromaFrequencyMap
Chris@140 39 << ", fftSize = " << parameters.fftSize << endl;
Chris@140 40 #endif
Chris@37 41 }
Chris@37 42
Chris@74 43 int
Chris@74 44 FeatureExtractor::getFeatureSizeFor(Parameters parameters)
Chris@74 45 {
Chris@74 46 if (parameters.useChromaFrequencyMap) {
Chris@74 47 return 13;
Chris@74 48 } else {
Chris@74 49 return 84;
Chris@74 50 }
Chris@74 51 }
Chris@74 52
Chris@37 53 void
Chris@37 54 FeatureExtractor::makeFreqMap()
Chris@37 55 {
Chris@37 56 m_freqMap = vector<int>(m_params.fftSize / 2 + 1, 0);
Chris@37 57
Chris@37 58 if (m_params.useChromaFrequencyMap) {
Chris@140 59 #ifdef DEBUG_FEATURE_EXTRACTOR
Chris@37 60 cerr << "makeFreqMap: calling makeChromaFrequencyMap" << endl;
Chris@37 61 #endif
Chris@37 62 makeChromaFrequencyMap();
Chris@37 63 } else {
Chris@140 64 #ifdef DEBUG_FEATURE_EXTRACTOR
Chris@37 65 cerr << "makeFreqMap: calling makeStandardFrequencyMap" << endl;
Chris@37 66 #endif
Chris@37 67 makeStandardFrequencyMap();
Chris@37 68 }
Chris@37 69 }
Chris@37 70
Chris@37 71 void
Chris@37 72 FeatureExtractor::makeStandardFrequencyMap()
Chris@37 73 {
Chris@169 74 // Our handling of the referenceFrequency parameter depends on the
Chris@169 75 // frequency map in use.
Chris@169 76
Chris@169 77 // With the chroma frequency map, we use referenceFrequency to set
Chris@169 78 // up the chroma bin frequencies when constructing the map, and
Chris@169 79 // then just follow the map (without having to refer to
Chris@169 80 // referenceFrequency again) when we get the frequency-domain
Chris@169 81 // audio.
Chris@169 82
Chris@169 83 // With the standard frequency map, using referenceFrequency to
Chris@169 84 // set up the map doesn't work so well -- it only really affects
Chris@169 85 // the crossover frequency, and much of the useful information is
Chris@169 86 // below that frequency. What we do instead is to ignore the
Chris@169 87 // referenceFrequency when creating the map -- setting it up for
Chris@169 88 // 440Hz -- and then use it to scale the individual
Chris@169 89 // frequency-domain audio frames before applying the map to them.
Chris@169 90
Chris@169 91 double refFreq = 440.; // See above -- *not* the parameter!
Chris@180 92 double binWidth = double(m_params.sampleRate) / m_params.fftSize;
Chris@188 93 int crossoverBin = int(2 / (pow(2, 1/12.0) - 1));
Chris@180 94 int crossoverMidi = int(log(crossoverBin * binWidth / refFreq)/
Chris@180 95 log(2.0) * 12 + 69 + 0.5);
Chris@163 96
Chris@37 97 int i = 0;
Chris@37 98 while (i <= crossoverBin) {
Chris@176 99 double freq = i * binWidth;
Chris@176 100 if (freq < m_params.minFrequency || freq > m_params.maxFrequency) {
Chris@176 101 m_freqMap[i++] = -1;
Chris@176 102 } else {
Chris@176 103 m_freqMap[i] = i;
Chris@176 104 i++;
Chris@176 105 }
Chris@37 106 }
Chris@37 107
Chris@37 108 while (i <= m_params.fftSize/2) {
Chris@176 109 double freq = i * binWidth;
Chris@176 110 if (freq < m_params.minFrequency || freq > m_params.maxFrequency) {
Chris@176 111 m_freqMap[i++] = -1;
Chris@176 112 } else {
Chris@176 113 double midi = log(freq / refFreq) / log(2.0) * 12 + 69;
Chris@176 114 if (midi > 127) midi = 127;
Chris@180 115 int target = crossoverBin + int(midi + 0.5) - crossoverMidi;
Chris@176 116 if (target >= m_featureSize) target = m_featureSize - 1;
Chris@176 117 m_freqMap[i++] = target;
Chris@176 118 }
Chris@37 119 }
Chris@166 120
Chris@166 121 #ifdef DEBUG_FEATURE_EXTRACTOR
Chris@166 122 cerr << "FeatureExtractor: crossover bin is " << crossoverBin << " for midi "
Chris@166 123 << crossoverMidi << endl;
Chris@176 124 cerr << "FeatureExtractor: map is:" << endl;
Chris@176 125 for (i = 0; i <= m_params.fftSize/2; ++i) {
Chris@176 126 cerr << i << ": " << m_freqMap[i] << ", ";
Chris@176 127 }
Chris@176 128 cerr << endl;
Chris@166 129 #endif
Chris@37 130 }
Chris@37 131
Chris@37 132 void
Chris@37 133 FeatureExtractor::makeChromaFrequencyMap()
Chris@37 134 {
Chris@159 135 double refFreq = m_params.referenceFrequency;
Chris@180 136 double binWidth = double(m_params.sampleRate) / m_params.fftSize;
Chris@188 137 int crossoverBin = int(1 / (pow(2, 1/12.0) - 1));
Chris@37 138 int i = 0;
Chris@37 139 while (i <= crossoverBin) {
Chris@176 140 double freq = i * binWidth;
Chris@176 141 if (freq < m_params.minFrequency || freq > m_params.maxFrequency) {
Chris@176 142 m_freqMap[i++] = -1;
Chris@176 143 } else {
Chris@176 144 m_freqMap[i++] = 0;
Chris@176 145 }
Chris@37 146 }
Chris@37 147 while (i <= m_params.fftSize/2) {
Chris@176 148 double freq = i * binWidth;
Chris@176 149 if (freq < m_params.minFrequency || freq > m_params.maxFrequency) {
Chris@176 150 m_freqMap[i++] = -1;
Chris@176 151 } else {
Chris@176 152 double midi = log(freq / refFreq) / log(2.0) * 12 + 69;
Chris@180 153 m_freqMap[i++] = (int(midi + 0.5)) % 12 + 1;
Chris@176 154 }
Chris@37 155 }
Chris@37 156 }
Chris@37 157
Chris@183 158 feature_t
Chris@37 159 FeatureExtractor::process(const vector<double> &real, const vector<double> &imag)
Chris@37 160 {
Chris@184 161 vector<float> mags(m_params.fftSize/2 + 1, 0.0);
Chris@184 162
Chris@184 163 for (int i = 0; i <= m_params.fftSize/2; i++) {
Chris@184 164 mags[i] = float(real[i] * real[i] + imag[i] * imag[i]);
Chris@184 165 }
Chris@184 166
Chris@184 167 return processMags(mags);
Chris@184 168 }
Chris@184 169
Chris@184 170 feature_t
Chris@184 171 FeatureExtractor::process(const vector<float> &real, const vector<float> &imag)
Chris@184 172 {
Chris@184 173 vector<float> mags(m_params.fftSize/2 + 1, 0.0);
Chris@169 174
Chris@169 175 for (int i = 0; i <= m_params.fftSize/2; i++) {
Chris@169 176 mags[i] = real[i] * real[i] + imag[i] * imag[i];
Chris@169 177 }
Chris@169 178
Chris@169 179 return processMags(mags);
Chris@169 180 }
Chris@169 181
Chris@183 182 feature_t
Chris@201 183 FeatureExtractor::process(const float *real, const float *imag)
Chris@201 184 {
Chris@201 185 vector<float> mags(m_params.fftSize/2 + 1, 0.0);
Chris@201 186
Chris@201 187 for (int i = 0; i <= m_params.fftSize/2; i++) {
Chris@201 188 mags[i] = real[i] * real[i] + imag[i] * imag[i];
Chris@201 189 }
Chris@201 190
Chris@201 191 return processMags(mags);
Chris@201 192 }
Chris@201 193
Chris@201 194 feature_t
Chris@169 195 FeatureExtractor::process(const float *cframe)
Chris@169 196 {
Chris@184 197 vector<float> mags(m_params.fftSize/2 + 1, 0.0);
Chris@169 198
Chris@169 199 for (int i = 0; i <= m_params.fftSize/2; i++) {
Chris@169 200 mags[i] = cframe[i*2] * cframe[i*2] + cframe[i*2+1] * cframe[i*2+1];
Chris@169 201 }
Chris@169 202
Chris@169 203 return processMags(mags);
Chris@169 204 }
Chris@169 205
Chris@183 206 feature_t
Chris@184 207 FeatureExtractor::processMags(const vector<float> &mags)
Chris@169 208 {
Chris@183 209 feature_t frame(m_featureSize, 0.0);
Chris@169 210
Chris@169 211 if (!m_params.useChromaFrequencyMap &&
Chris@169 212 (m_params.referenceFrequency != 440.)) {
Chris@169 213
Chris@169 214 // See comment in makeStandardFrequencyMap above
Chris@184 215 vector<float> scaled = scaleMags(mags);
Chris@169 216
Chris@169 217 for (int i = 0; i <= m_params.fftSize/2; i++) {
Chris@178 218 int index = m_freqMap[i];
Chris@178 219 if (index >= 0) {
Chris@178 220 frame[index] += scaled[i];
Chris@178 221 }
Chris@169 222 }
Chris@169 223
Chris@169 224 } else {
Chris@169 225 for (int i = 0; i <= m_params.fftSize/2; i++) {
Chris@178 226 int index = m_freqMap[i];
Chris@178 227 if (index >= 0) {
Chris@178 228 frame[index] += mags[i];
Chris@178 229 }
Chris@176 230 }
Chris@37 231 }
Chris@37 232
Chris@103 233 return frame;
Chris@74 234 }
Chris@74 235
Chris@184 236 vector<float>
Chris@184 237 FeatureExtractor::scaleMags(const vector<float> &mags)
Chris@74 238 {
Chris@169 239 // Scale the pitch content in the given magnitude spectrum to
Chris@169 240 // accommodate a difference in tuning frequency (between the 440Hz
Chris@169 241 // reference and the actual tuning frequency of the input audio).
Chris@169 242 // We only do this when not using chroma features -- see the
Chris@169 243 // comment in makeStandardFrequencyMap() above.
Chris@169 244
Chris@169 245 if (m_params.useChromaFrequencyMap) return mags;
Chris@169 246
Chris@184 247 double ratio = 440.f / m_params.referenceFrequency;
Chris@169 248
Chris@180 249 int n = static_cast<int>(mags.size());
Chris@169 250
Chris@184 251 vector<float> scaled(n, 0.0);
Chris@169 252
Chris@169 253 for (int target = 0; target < n; ++target) {
Chris@169 254
Chris@169 255 double source = target / ratio;
Chris@169 256
Chris@169 257 int lower = int(source);
Chris@169 258 int higher = lower + 1;
Chris@169 259
Chris@169 260 double lowerProp = higher - source;
Chris@169 261 double higherProp = source - lower;
Chris@169 262
Chris@169 263 double value = 0.0;
Chris@169 264 if (lower >= 0 && lower < n) {
Chris@169 265 value += lowerProp * mags[lower];
Chris@176 266 }
Chris@169 267 if (higher >= 0 && higher < n) {
Chris@169 268 value += higherProp * mags[higher];
Chris@169 269 }
Chris@169 270
Chris@184 271 scaled[target] = float(value);
Chris@74 272 }
Chris@74 273
Chris@169 274 return scaled;
Chris@74 275 }
Chris@74 276