Chris@37
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@37
|
2
|
Chris@37
|
3 /*
|
Chris@37
|
4 Vamp feature extraction plugin using the MATCH audio alignment
|
Chris@37
|
5 algorithm.
|
Chris@37
|
6
|
Chris@37
|
7 Centre for Digital Music, Queen Mary, University of London.
|
Chris@37
|
8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
|
Chris@37
|
9
|
Chris@37
|
10 This program is free software; you can redistribute it and/or
|
Chris@37
|
11 modify it under the terms of the GNU General Public License as
|
Chris@37
|
12 published by the Free Software Foundation; either version 2 of the
|
Chris@37
|
13 License, or (at your option) any later version. See the file
|
Chris@37
|
14 COPYING included with this distribution for more information.
|
Chris@37
|
15 */
|
Chris@37
|
16
|
Chris@37
|
17 #include "FeatureExtractor.h"
|
Chris@37
|
18
|
Chris@37
|
19 #include <iostream>
|
Chris@37
|
20
|
Chris@37
|
21 #include <cstdlib>
|
Chris@37
|
22 #include <cassert>
|
Chris@37
|
23 #include <cmath>
|
Chris@37
|
24
|
Chris@37
|
25 using namespace std;
|
Chris@37
|
26
|
Chris@37
|
27 FeatureExtractor::FeatureExtractor(Parameters parameters) :
|
Chris@37
|
28 m_params(parameters),
|
Chris@37
|
29 m_ltAverage(0)
|
Chris@37
|
30 {
|
Chris@74
|
31 m_featureSize = getFeatureSizeFor(parameters);
|
Chris@37
|
32 m_prevFrame = vector<double>(m_featureSize, 0.0);
|
Chris@37
|
33
|
Chris@37
|
34 makeFreqMap();
|
Chris@37
|
35 }
|
Chris@37
|
36
|
Chris@74
|
37 int
|
Chris@74
|
38 FeatureExtractor::getFeatureSizeFor(Parameters parameters)
|
Chris@74
|
39 {
|
Chris@74
|
40 if (parameters.useChromaFrequencyMap) {
|
Chris@74
|
41 return 13;
|
Chris@74
|
42 } else {
|
Chris@74
|
43 return 84;
|
Chris@74
|
44 }
|
Chris@74
|
45 }
|
Chris@74
|
46
|
Chris@37
|
47 void
|
Chris@37
|
48 FeatureExtractor::makeFreqMap()
|
Chris@37
|
49 {
|
Chris@37
|
50 m_freqMap = vector<int>(m_params.fftSize / 2 + 1, 0);
|
Chris@37
|
51
|
Chris@37
|
52 if (m_params.useChromaFrequencyMap) {
|
Chris@37
|
53 #ifdef DEBUG_MATCHER
|
Chris@37
|
54 cerr << "makeFreqMap: calling makeChromaFrequencyMap" << endl;
|
Chris@37
|
55 #endif
|
Chris@37
|
56 makeChromaFrequencyMap();
|
Chris@37
|
57 } else {
|
Chris@37
|
58 #ifdef DEBUG_MATCHER
|
Chris@37
|
59 cerr << "makeFreqMap: calling makeStandardFrequencyMap" << endl;
|
Chris@37
|
60 #endif
|
Chris@37
|
61 makeStandardFrequencyMap();
|
Chris@37
|
62 }
|
Chris@37
|
63 }
|
Chris@37
|
64
|
Chris@37
|
65 void
|
Chris@37
|
66 FeatureExtractor::makeStandardFrequencyMap()
|
Chris@37
|
67 {
|
Chris@37
|
68 double binWidth = m_params.sampleRate / m_params.fftSize;
|
Chris@37
|
69 int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1));
|
Chris@37
|
70 int crossoverMidi = lrint(log(crossoverBin*binWidth/440.0)/
|
Chris@37
|
71 log(2.0) * 12 + 69);
|
Chris@37
|
72
|
Chris@37
|
73 // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth;
|
Chris@37
|
74
|
Chris@37
|
75 int i = 0;
|
Chris@37
|
76 while (i <= crossoverBin) {
|
Chris@37
|
77 m_freqMap[i] = i;
|
Chris@37
|
78 ++i;
|
Chris@37
|
79 }
|
Chris@37
|
80
|
Chris@37
|
81 while (i <= m_params.fftSize/2) {
|
Chris@37
|
82 double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69;
|
Chris@37
|
83 if (midi > 127) midi = 127;
|
Chris@40
|
84 int target = crossoverBin + lrint(midi) - crossoverMidi;
|
Chris@40
|
85 if (target >= m_featureSize) target = m_featureSize - 1;
|
Chris@40
|
86 m_freqMap[i++] = target;
|
Chris@37
|
87 }
|
Chris@37
|
88 }
|
Chris@37
|
89
|
Chris@37
|
90 void
|
Chris@37
|
91 FeatureExtractor::makeChromaFrequencyMap()
|
Chris@37
|
92 {
|
Chris@37
|
93 double binWidth = m_params.sampleRate / m_params.fftSize;
|
Chris@37
|
94 int crossoverBin = (int)(1 / (pow(2, 1/12.0) - 1));
|
Chris@37
|
95 int i = 0;
|
Chris@37
|
96 while (i <= crossoverBin) {
|
Chris@37
|
97 m_freqMap[i++] = 0;
|
Chris@37
|
98 }
|
Chris@37
|
99 while (i <= m_params.fftSize/2) {
|
Chris@37
|
100 double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69;
|
Chris@37
|
101 m_freqMap[i++] = (lrint(midi)) % 12 + 1;
|
Chris@37
|
102 }
|
Chris@37
|
103 }
|
Chris@37
|
104
|
Chris@37
|
105 vector<double>
|
Chris@37
|
106 FeatureExtractor::process(const vector<double> &real, const vector<double> &imag)
|
Chris@37
|
107 {
|
Chris@37
|
108 vector<double> frame(m_featureSize, 0.0);
|
Chris@37
|
109
|
Chris@37
|
110 double rms = 0;
|
Chris@37
|
111 for (int i = 0; i <= m_params.fftSize/2; i++) {
|
Chris@37
|
112 double mag = real[i] * real[i] + imag[i] * imag[i];
|
Chris@37
|
113 rms += mag;
|
Chris@37
|
114 frame[m_freqMap[i]] += mag;
|
Chris@37
|
115 }
|
Chris@37
|
116 rms = sqrt(rms / (m_params.fftSize/2));
|
Chris@37
|
117
|
Chris@74
|
118 return postProcess(frame, rms);
|
Chris@74
|
119 }
|
Chris@74
|
120
|
Chris@74
|
121 vector<double>
|
Chris@74
|
122 FeatureExtractor::process(const float *cframe)
|
Chris@74
|
123 {
|
Chris@74
|
124 vector<double> frame(m_featureSize, 0.0);
|
Chris@74
|
125
|
Chris@74
|
126 double rms = 0;
|
Chris@74
|
127 for (int i = 0; i <= m_params.fftSize/2; i++) {
|
Chris@74
|
128 double mag = cframe[i*2] * cframe[i*2] + cframe[i*2+1] * cframe[i*2+1];
|
Chris@74
|
129 rms += mag;
|
Chris@74
|
130 frame[m_freqMap[i]] += mag;
|
Chris@74
|
131 }
|
Chris@74
|
132 rms = sqrt(rms / (m_params.fftSize/2));
|
Chris@74
|
133
|
Chris@74
|
134 return postProcess(frame, rms);
|
Chris@74
|
135 }
|
Chris@74
|
136
|
Chris@74
|
137 vector<double>
|
Chris@74
|
138 FeatureExtractor::postProcess(const vector<double> &frame, double rms)
|
Chris@74
|
139 {
|
Chris@37
|
140 vector<double> feature(m_featureSize, 0.0);
|
Chris@37
|
141
|
Chris@37
|
142 double totalEnergy = 0;
|
Chris@37
|
143 if (m_params.useSpectralDifference) {
|
Chris@37
|
144 for (int i = 0; i < m_featureSize; i++) {
|
Chris@37
|
145 totalEnergy += frame[i];
|
Chris@37
|
146 if (frame[i] > m_prevFrame[i]) {
|
Chris@37
|
147 feature[i] = frame[i] - m_prevFrame[i];
|
Chris@37
|
148 } else {
|
Chris@37
|
149 feature[i] = 0;
|
Chris@37
|
150 }
|
Chris@37
|
151 }
|
Chris@37
|
152 } else {
|
Chris@37
|
153 for (int i = 0; i < m_featureSize; i++) {
|
Chris@37
|
154 feature[i] = frame[i];
|
Chris@37
|
155 totalEnergy += feature[i];
|
Chris@37
|
156 }
|
Chris@37
|
157 }
|
Chris@37
|
158
|
Chris@37
|
159 if (m_ltAverage == 0) {
|
Chris@37
|
160 m_ltAverage = totalEnergy;
|
Chris@37
|
161 } else {
|
Chris@37
|
162 double decay = m_params.decay;
|
Chris@37
|
163 m_ltAverage = m_ltAverage * decay + totalEnergy * (1.0 - decay);
|
Chris@37
|
164 }
|
Chris@37
|
165
|
Chris@37
|
166 if (rms <= m_params.silenceThreshold) {
|
Chris@37
|
167 for (int i = 0; i < m_featureSize; i++) {
|
Chris@37
|
168 feature[i] = 0;
|
Chris@37
|
169 }
|
Chris@37
|
170 } else if (m_params.frameNorm == NormaliseFrameToSum1) {
|
Chris@37
|
171 for (int i = 0; i < m_featureSize; i++) {
|
Chris@37
|
172 feature[i] /= totalEnergy;
|
Chris@37
|
173 }
|
Chris@37
|
174 } else if (m_params.frameNorm == NormaliseFrameToLTAverage) {
|
Chris@37
|
175 for (int i = 0; i < m_featureSize; i++) {
|
Chris@37
|
176 feature[i] /= m_ltAverage;
|
Chris@37
|
177 }
|
Chris@37
|
178 }
|
Chris@37
|
179
|
Chris@37
|
180 m_prevFrame = frame;
|
Chris@37
|
181 return feature;
|
Chris@37
|
182 }
|
Chris@37
|
183
|